|
@@ -0,0 +1,361 @@
|
|
|
+# -*- coding: utf-8 -*-
|
|
|
+# @Author: wangkun
|
|
|
+# @Time: 2022/5/23
|
|
|
+import os
|
|
|
+import sys
|
|
|
+import time
|
|
|
+import requests
|
|
|
+from datetime import date, timedelta
|
|
|
+from dateutil import parser
|
|
|
+
|
|
|
+sys.path.append(os.getcwd())
|
|
|
+from common import Common
|
|
|
+from feishu_lib import Feishu
|
|
|
+
|
|
|
+proxies = {"http": "127.0.0.1:19180", "https": "127.0.0.1:19180"}
|
|
|
+
|
|
|
+
|
|
|
+class Search:
|
|
|
+ # 前天 <class 'str'> 2022-04-15
|
|
|
+ before_yesterday = (date.today() + timedelta(days=2)).strftime("%Y-%m-%d")
|
|
|
+ # 昨天 <class 'str'> 2022-04-13
|
|
|
+ yesterday = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d")
|
|
|
+ # 今天 <class 'datetime.date'> 2022-04-14
|
|
|
+ today = date.today()
|
|
|
+
|
|
|
+ cursor = ""
|
|
|
+
|
|
|
+ # 搜索词列表
|
|
|
+ @classmethod
|
|
|
+ def search_words(cls):
|
|
|
+ # 搜索词
|
|
|
+ word_list = []
|
|
|
+ # 从云文档读取所有敏感词,添加到词库列表
|
|
|
+ time.sleep(1)
|
|
|
+ lists = Feishu.get_values_batch("twitter", "PZGpSZ")
|
|
|
+ for i in lists:
|
|
|
+ for j in i:
|
|
|
+ # 过滤空的单元格内容
|
|
|
+ if j is None:
|
|
|
+ pass
|
|
|
+ elif "#" in j:
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ word_list.append(j)
|
|
|
+ return word_list
|
|
|
+
|
|
|
+ # 更新用户信息
|
|
|
+ @classmethod
|
|
|
+ def update_user_info(cls, uid, key_word, values):
|
|
|
+ try:
|
|
|
+ if len(Feishu.get_values_batch("twitter", "db114c")) == 1:
|
|
|
+ Common.logger().info("无用户信息")
|
|
|
+ else:
|
|
|
+ time.sleep(1)
|
|
|
+ i = Feishu.find_cell("twitter", "db114c", uid)
|
|
|
+ user_words = Feishu.get_range_value("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i))
|
|
|
+ user_create_time = Feishu.get_range_value("twitter", "db114c", "T" + str(i) + ":" + "T" + str(i))[0]
|
|
|
+ user_update_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time()))
|
|
|
+ if key_word in user_words:
|
|
|
+ Common.logger().info("满足条件:key_word已存在,更新当前用户信息:{}", uid)
|
|
|
+ time.sleep(1)
|
|
|
+ values.append(user_create_time)
|
|
|
+ values.append(user_update_time)
|
|
|
+ Common.logger().info("values:{}", values)
|
|
|
+ Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values])
|
|
|
+ Common.logger().info("用户:{}信息更新成功", uid)
|
|
|
+ return
|
|
|
+ elif key_word not in user_words:
|
|
|
+ Common.logger().info("满足条件:key_word不存在,更新当前用户信息:{}", uid)
|
|
|
+ # 先更新除了 key_word 以外的信息
|
|
|
+ time.sleep(1)
|
|
|
+ values.append(user_create_time)
|
|
|
+ values.append(user_update_time)
|
|
|
+ Common.logger().info("values:{}", values)
|
|
|
+ Feishu.update_values("twitter", "db114c", "C" + str(i) + ":" + "U" + str(i), [values])
|
|
|
+ Common.logger().info("用户:{}信息更新成功", uid)
|
|
|
+ # 再更新 key_word
|
|
|
+ time.sleep(1)
|
|
|
+ words = user_words[0]+","+key_word
|
|
|
+ Feishu.update_values("twitter", "db114c", "B" + str(i) + ":" + "B" + str(i),
|
|
|
+ [[str(words)]])
|
|
|
+ Common.logger().info("用户key_word:{}更新成功", key_word)
|
|
|
+ return
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger().error("更新用户信息异常:{}", e)
|
|
|
+
|
|
|
+ # 根据关键字搜索
|
|
|
+ @classmethod
|
|
|
+ def search_users_v2(cls, key_word):
|
|
|
+ try:
|
|
|
+ cursor_params = ''
|
|
|
+ if len(cls.cursor) > 0:
|
|
|
+ cursor_params = '&cursor={}'.format(cls.cursor)
|
|
|
+ # 搜索最近三天的数据
|
|
|
+ # url = "https://twitter.com/i/api/2/search/adaptive.json?" \
|
|
|
+ # "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \
|
|
|
+ # "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \
|
|
|
+ # "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \
|
|
|
+ # "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \
|
|
|
+ # "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \
|
|
|
+ # "include_ext_media_color=true&include_ext_media_availability=true&" \
|
|
|
+ # "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \
|
|
|
+ # "send_error_codes=true&simple_quoted_tweet=true&" \
|
|
|
+ # "q=(" + key_word + ")%20until%3A" + str(cls.today) + "%20since%3A" + str(cls.before_yesterday) + \
|
|
|
+ # "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \
|
|
|
+ # "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \
|
|
|
+ # "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo"
|
|
|
+
|
|
|
+ url = "https://twitter.com/i/api/2/search/adaptive.json?" \
|
|
|
+ "include_profile_interstitial_type=1&include_blocking=1&include_blocked_by=1&" \
|
|
|
+ "include_followed_by=1&include_want_retweets=1&include_mute_edge=1&include_can_dm=1&" \
|
|
|
+ "include_can_media_tag=1&include_ext_has_nft_avatar=1&skip_status=1&" \
|
|
|
+ "cards_platform=Web-12&include_cards=1&include_ext_alt_text=true&include_quote_count=true&" \
|
|
|
+ "include_reply_count=1&tweet_mode=extended&include_entities=true&include_user_entities=true&" \
|
|
|
+ "include_ext_media_color=true&include_ext_media_availability=true&" \
|
|
|
+ "include_ext_sensitive_media_warning=true&include_ext_trusted_friends_metadata=true&" \
|
|
|
+ "send_error_codes=true&simple_quoted_tweet=true&" \
|
|
|
+ "q=" + key_word + \
|
|
|
+ "&result_filter=user&count=20&query_source=typed_query" + cursor_params + \
|
|
|
+ "&pc=1&spelling_corrections=1&ext=mediaStats%2ChighlightedLabel%2ChasNftAvatar%2CvoiceInfo%2" \
|
|
|
+ "Cenrichments%2CsuperFollowMetadata%2CunmentionInfo"
|
|
|
+ headers = {
|
|
|
+ 'authority': 'twitter.com',
|
|
|
+ 'accept': '*/*',
|
|
|
+ 'accept-language': 'zh-CN,zh;q=0.9',
|
|
|
+ 'authorization': 'Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejRCOuH5E6I8xnZz'
|
|
|
+ '4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu4FA33AGWWjCpTnA',
|
|
|
+ 'cookie': 'guest_id_marketing=v1%3A164691584304284451; guest_id_ads=v1%3A164691584304284451;'
|
|
|
+ ' kdt=RGGgmMi1qsAE8ap8NlKFjpksuDXG9gdD1utIeK0u; des_opt_in=Y; _gcl_au=1.1.1066'
|
|
|
+ '77612.1647418528;'
|
|
|
+ ' g_state={"i_l":0}; _gid=GA1.2.645428048.1652699425;'
|
|
|
+ ' personalization_id="v1_zSZMfoG7rsTlMHQYwOA39Q=="; guest_id=v1%3A165294843395764407;'
|
|
|
+ ' auth_token=592dbe3e68ce355f31f8343d700215030fbcd817;'
|
|
|
+ ' ct0=df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f7f39a447'
|
|
|
+ 'b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c1056545ab007de55be;'
|
|
|
+ ' twid=u%3D1501900092303101953; external_referer=padhuUp37zg6GVaBnLSoCA0layDKYA'
|
|
|
+ 'Tn|0|8e8t2xd8A2w%3D; mbox=PC#3ffa21b420af400ca9e94d2b1b72525c.32_0#1716385856|s'
|
|
|
+ 'ession#047c8af8f5e34fa585b247e05c6f0a6b#1653142916; _ga=GA1.2.659870250.1646915849;'
|
|
|
+ ' _ga_BYKEBDM7DS=GS1.1.1653201242.12.0.1653201242.0; _ga_34PHSZMC42=GS1.1.1653201242.5'
|
|
|
+ '8.0.1653201242.0; lang=zh-cn; _twitter_sess=BAh7CSIKZmxhc2hJQzonQWN0aW9uQ29udHJvbGxlcjo6R'
|
|
|
+ 'mxhc2g6OkZsYXNo%250ASGFzaHsABjoKQHVzZWR7ADoPY3JlYXRlZF9hdGwrCMQBs%252BqAAToMY3NyZl9p%250AZC'
|
|
|
+ 'IlYjJkNWIyOTZiMzhmMGVlNWM1NDY0MmUyNDM5NTJkNjg6B2lkIiVkZjNl%250AMWNkNTY5OTUwNDdiYzgzNDE1NG'
|
|
|
+ 'UyNjA3ZWU1NA%253D%253D--b3450fa2f7a9503c9e5e8356aff22570d29a7912; guest_id=v1%3A16479480474'
|
|
|
+ '0239293; guest_id_ads=v1%3A164794804740239293; guest_id_marketing=v1%3A164794804740239293;'
|
|
|
+ ' personalization_id="v1_/1LnzKXLyeYnZl13Ri62bg=="',
|
|
|
+ # 搜索最近三天的
|
|
|
+ # 'referer': "https://twitter.com/search?q=(" + key_word + ")%20until%3A" + str(cls.today) +
|
|
|
+ # "%20since%3A" + str(cls.before_yesterday) + "&src=typed_query&f=user",
|
|
|
+ 'referer': "https://twitter.com/search?q=" + key_word + "&src=typed_query&f=user",
|
|
|
+ 'sec-ch-ua': '" Not A;Brand";v="99", "Chromium";v="101", "Google Chrome";v="101"',
|
|
|
+ 'sec-ch-ua-mobile': '?0',
|
|
|
+ 'sec-ch-ua-platform': '"macOS"',
|
|
|
+ 'sec-fetch-dest': 'empty',
|
|
|
+ 'sec-fetch-mode': 'cors',
|
|
|
+ 'sec-fetch-site': 'same-origin',
|
|
|
+ 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko)'
|
|
|
+ ' Chrome/101.0.4951.64 Safari/537.36',
|
|
|
+ 'x-csrf-token': 'df0294bd236bf2b599c0c62906066652be2f03658877d0fe982fbb0bb645270e8485ddb2f'
|
|
|
+ '7f39a447b9e7ab341e244415576d8303df6302876fb00b8a5c996871bcfc2703a5d1c10565'
|
|
|
+ '45ab007de55be',
|
|
|
+ 'x-twitter-active-user': 'yes',
|
|
|
+ 'x-twitter-auth-type': 'OAuth2Session',
|
|
|
+ 'x-twitter-client-language': 'zh-cn'
|
|
|
+ }
|
|
|
+ r = requests.get(url=url, headers=headers, proxies=proxies)
|
|
|
+ # Common.logger().info("response:{}", r.text)
|
|
|
+ cls.cursor = r.json()["timeline"]["instructions"][-1]["addEntries"][
|
|
|
+ "entries"][-1]["content"]["operation"]["cursor"]["value"]
|
|
|
+ # Common.logger().info("cursor:{}", cls.cursor)
|
|
|
+ users = r.json()["globalObjects"]["users"]
|
|
|
+ if len(users) == 0:
|
|
|
+ Common.logger().info("本次请求无数据返回")
|
|
|
+ return
|
|
|
+ else:
|
|
|
+ userid_list = []
|
|
|
+ for userid in users:
|
|
|
+ userid_list.append(userid)
|
|
|
+ for userinfo in userid_list:
|
|
|
+ userinfo = users[userinfo]
|
|
|
+
|
|
|
+ if "id_str" in userinfo:
|
|
|
+ uid = userinfo["id_str"]
|
|
|
+ else:
|
|
|
+ uid = "null"
|
|
|
+
|
|
|
+ if "name" in userinfo:
|
|
|
+ name = userinfo["name"]
|
|
|
+ else:
|
|
|
+ name = "null"
|
|
|
+
|
|
|
+ if "screen_name" in userinfo:
|
|
|
+ screen_name = userinfo["screen_name"]
|
|
|
+ else:
|
|
|
+ screen_name = "null"
|
|
|
+
|
|
|
+ if screen_name == "null":
|
|
|
+ person_url = "null"
|
|
|
+ else:
|
|
|
+ person_url = "https://twitter.com/" + screen_name
|
|
|
+
|
|
|
+ if "description" in userinfo:
|
|
|
+ description = userinfo["description"]
|
|
|
+ else:
|
|
|
+ description = "null"
|
|
|
+
|
|
|
+ if "location" in userinfo:
|
|
|
+ location = userinfo["location"]
|
|
|
+ else:
|
|
|
+ location = "null"
|
|
|
+
|
|
|
+ if "friends_count" in userinfo:
|
|
|
+ friends_count = userinfo["friends_count"]
|
|
|
+ else:
|
|
|
+ friends_count = "null"
|
|
|
+
|
|
|
+ if "followers_count" in userinfo:
|
|
|
+ followers_count = userinfo["followers_count"]
|
|
|
+ else:
|
|
|
+ followers_count = "null"
|
|
|
+
|
|
|
+ if "favourites_count" in userinfo:
|
|
|
+ favourites_count = userinfo["favourites_count"]
|
|
|
+ else:
|
|
|
+ favourites_count = "null"
|
|
|
+
|
|
|
+ if "listed_count" in userinfo:
|
|
|
+ listed_count = userinfo["listed_count"]
|
|
|
+ else:
|
|
|
+ listed_count = "null"
|
|
|
+
|
|
|
+ if "statuses_count" in userinfo:
|
|
|
+ statuses_count = userinfo["statuses_count"]
|
|
|
+ else:
|
|
|
+ statuses_count = "null"
|
|
|
+
|
|
|
+ if "media_count" in userinfo:
|
|
|
+ media_count = userinfo["media_count"]
|
|
|
+ else:
|
|
|
+ media_count = "null"
|
|
|
+
|
|
|
+ if "entities" not in userinfo:
|
|
|
+ display_url = "null"
|
|
|
+ elif "url" not in userinfo["entities"]:
|
|
|
+ display_url = "null"
|
|
|
+ elif "display_url" in userinfo["entities"]["url"]["urls"][0]:
|
|
|
+ display_url = userinfo["entities"]["url"]["urls"][0]["display_url"]
|
|
|
+ elif "expanded_url" in userinfo["entities"]["url"]["urls"][0]:
|
|
|
+ display_url = userinfo["entities"]["url"]["urls"][0]["expanded_url"]
|
|
|
+ elif "url" in userinfo["entities"]["url"]["urls"][0]:
|
|
|
+ display_url = userinfo["entities"]["url"]["urls"][0]["url"]
|
|
|
+ else:
|
|
|
+ display_url = "null"
|
|
|
+
|
|
|
+ if "created_at" in userinfo:
|
|
|
+ created_at1 = userinfo["created_at"]
|
|
|
+ created_at = str(parser.parse(created_at1).strftime("%Y/%m/%d %H:%M:%S"))
|
|
|
+ else:
|
|
|
+ created_at = "null"
|
|
|
+
|
|
|
+ if "profile_image_url" in userinfo:
|
|
|
+ profile_image_url = userinfo["profile_image_url"]
|
|
|
+ else:
|
|
|
+ profile_image_url = "null"
|
|
|
+
|
|
|
+ if "profile_banner_url" in userinfo:
|
|
|
+ profile_banner_url = userinfo["profile_banner_url"]
|
|
|
+ else:
|
|
|
+ profile_banner_url = "null"
|
|
|
+
|
|
|
+ if "ext_has_nft_avatar" in userinfo:
|
|
|
+ ext_has_nft_avatar = userinfo["ext_has_nft_avatar"]
|
|
|
+ else:
|
|
|
+ ext_has_nft_avatar = "null"
|
|
|
+
|
|
|
+ if "verified" in userinfo:
|
|
|
+ verified = userinfo["verified"]
|
|
|
+ else:
|
|
|
+ verified = "null"
|
|
|
+
|
|
|
+ # 过滤无效用户
|
|
|
+ if uid == "" or uid == "null":
|
|
|
+ Common.logger().info("无效用户")
|
|
|
+
|
|
|
+ # 用户已存在云文档中
|
|
|
+ elif uid in [j for i in Feishu.get_values_batch("twitter", "db114c") for j in i]:
|
|
|
+ Common.logger().info("用户已存在:{}", uid)
|
|
|
+ time.sleep(1)
|
|
|
+ values = [str(name),
|
|
|
+ str(screen_name),
|
|
|
+ str(person_url),
|
|
|
+ str(description),
|
|
|
+ str(location),
|
|
|
+ int(friends_count),
|
|
|
+ int(followers_count),
|
|
|
+ int(favourites_count),
|
|
|
+ int(listed_count),
|
|
|
+ int(statuses_count),
|
|
|
+ int(media_count),
|
|
|
+ str(display_url),
|
|
|
+ str(created_at),
|
|
|
+ str(profile_image_url),
|
|
|
+ str(profile_banner_url),
|
|
|
+ str(ext_has_nft_avatar),
|
|
|
+ str(verified)]
|
|
|
+ cls.update_user_info(uid, key_word, values)
|
|
|
+
|
|
|
+ # 用户未存在云文档中
|
|
|
+ else:
|
|
|
+ Common.logger().info("添加用户:{} 至云文档", name)
|
|
|
+ create_time = time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(time.time()))
|
|
|
+ update_time = ""
|
|
|
+
|
|
|
+ # 云文档插入行:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c
|
|
|
+ Feishu.insert_columns("twitter", "db114c", "ROWS", 1, 2)
|
|
|
+ # 云文档写入数据:https://w42nne6hzg.feishu.cn/sheets/shtcn6BYfYuqegIP13ORB6rI2dh?sheet=db114c
|
|
|
+ values = [[str(uid),
|
|
|
+ str(key_word),
|
|
|
+ str(name),
|
|
|
+ str(screen_name),
|
|
|
+ str(person_url),
|
|
|
+ str(description),
|
|
|
+ str(location),
|
|
|
+ int(friends_count),
|
|
|
+ int(followers_count),
|
|
|
+ int(favourites_count),
|
|
|
+ int(listed_count),
|
|
|
+ int(statuses_count),
|
|
|
+ int(media_count),
|
|
|
+ str(display_url),
|
|
|
+ str(created_at),
|
|
|
+ str(profile_image_url),
|
|
|
+ str(profile_banner_url),
|
|
|
+ str(ext_has_nft_avatar),
|
|
|
+ str(verified),
|
|
|
+ str(create_time),
|
|
|
+ str(update_time)]]
|
|
|
+ time.sleep(1)
|
|
|
+ Feishu.update_values("twitter", "db114c", "A2:U2", values)
|
|
|
+ Common.logger().info("添加成功\n")
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ Common.logger().error("搜索用户异常:{}", e)
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def search_users_by_key_words(cls):
|
|
|
+ for key_word in cls.search_words():
|
|
|
+ Common.logger().info("根据关键词:{} 搜索用户", key_word)
|
|
|
+ cls.cursor = ''
|
|
|
+ time.sleep(1)
|
|
|
+ start = time.time()
|
|
|
+ for i in range(200):
|
|
|
+ Common.logger().info("正在请求第{}页", i+1)
|
|
|
+ cls.search_users_v2(key_word)
|
|
|
+ end_time = time.time()
|
|
|
+ Common.logger().info("本次根据{}关键词搜索, 共耗时:{}秒", key_word, int(end_time-start))
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ search = Search()
|
|
|
+ # search.search_users("web3")
|
|
|
+ search.search_users_by_key_words()
|