|
@@ -36,24 +36,19 @@ class ShiPinHaoAccount:
|
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
|
|
|
|
def get_token_from_mysql(self):
|
|
|
- # select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
|
|
|
- # # print(select_sql)
|
|
|
- # configs = MysqlHelper.get_values(
|
|
|
- # log_type=self.mode,
|
|
|
- # crawler=self.platform,
|
|
|
- # sql=select_sql,
|
|
|
- # env=self.env,
|
|
|
- # machine="",
|
|
|
- # )
|
|
|
- # print(configs)
|
|
|
- # token_config = configs[0][0]
|
|
|
- # token_info = json.loads(token_config)
|
|
|
- # self.token = token_info["token"]
|
|
|
- # self.cookie = token_info["cookie"]
|
|
|
- self.token = "766484754"
|
|
|
- self.cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3524986952|1|fgp|0; uuid=a76c16bf749aaf6418aa610ad5c6e66c; rand_info=CAESIDhWIfyhucI9xQkQm/2xYzaHtaGjRUbHeNKgSt4b382C; slave_bizuin=3930572231; data_bizuin=3930572231; bizuin=3930572231; data_ticket=k3o3TmbxDq450TMRpBL2zW+f1onbHFg7G4/9iLi/jlp1zyWQtmpjxFouT+/kRE1e; slave_sid=TndTREg5TW9MaFUxRllkaVFacXh6bVhFSEhpSEVRNUc2RWtBbnJRZmdxZzNxaUpOc29oRGJ1RjhFZm9jNXZ3Q1JzUzN3elFDYlVjZTEyN1YyWm9nOGhsUW9sNTFEUEtDRmo1Z0hzZjA1ZjhibXg0YzVrOE91N3ZOZWVqT3UxT0FSN3lsNG9SNTNNdEE2VWNC; slave_user=gh_deef7ad59a83; xid=9bd5b038d83164cbfa24bcf224bc9172; _clsk=bqf6jh|1699929305392|6|1|mp.weixin.qq.com/weheat-agent/payload/record"
|
|
|
- print(self.token)
|
|
|
- print(self.cookie)
|
|
|
+ select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
|
|
|
+ # print(select_sql)
|
|
|
+ configs = MysqlHelper.get_values(
|
|
|
+ log_type=self.mode,
|
|
|
+ crawler=self.platform,
|
|
|
+ sql=select_sql,
|
|
|
+ env=self.env,
|
|
|
+ machine="",
|
|
|
+ )
|
|
|
+ token_config = configs[0][0]
|
|
|
+ token_info = json.loads(token_config)
|
|
|
+ self.token = token_info["token"]
|
|
|
+ self.cookie = token_info["cookie"]
|
|
|
|
|
|
def get_history_id(self):
|
|
|
"""
|
|
@@ -67,9 +62,9 @@ class ShiPinHaoAccount:
|
|
|
env=self.env,
|
|
|
machine="",
|
|
|
)
|
|
|
- print(name_id)
|
|
|
if name_id:
|
|
|
- return name_id[0]
|
|
|
+ # return False
|
|
|
+ return name_id[0][0]
|
|
|
else:
|
|
|
return False
|
|
|
|
|
@@ -79,6 +74,9 @@ class ShiPinHaoAccount:
|
|
|
if history_id:
|
|
|
return history_id
|
|
|
else:
|
|
|
+ self.get_token_from_mysql()
|
|
|
+ print(self.token)
|
|
|
+ print(self.cookie)
|
|
|
url = "https://mp.weixin.qq.com/cgi-bin/videosnap"
|
|
|
params = {
|
|
|
"action": "search",
|
|
@@ -105,7 +103,7 @@ class ShiPinHaoAccount:
|
|
|
response = requests.request("GET", url, headers=headers, params=params)
|
|
|
self.token_count += 1
|
|
|
user_list = response.json()
|
|
|
- print(user_list)
|
|
|
+ # print(user_list)
|
|
|
user_list = user_list["acct_list"]
|
|
|
target_user = find_target_user(name=self.account_name, user_list=user_list)
|
|
|
# 写入 MySql 数据库
|
|
@@ -125,27 +123,15 @@ class ShiPinHaoAccount:
|
|
|
|
|
|
def get_account_videos(self):
|
|
|
# 一个账号最多抓 30 条数据
|
|
|
- self.get_token_from_mysql()
|
|
|
user_id = self.get_account_id()
|
|
|
- print("ljh", user_id)
|
|
|
- print(type(user_id))
|
|
|
if user_id:
|
|
|
+ print(user_id)
|
|
|
url = "https://mp.weixin.qq.com/cgi-bin/videosnap"
|
|
|
- headers = {
|
|
|
- "authority": "mp.weixin.qq.com",
|
|
|
- "accept": "*/*",
|
|
|
- "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
|
|
|
- "cookie": self.cookie,
|
|
|
- "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN".format(
|
|
|
- self.token
|
|
|
- ),
|
|
|
- "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
|
|
- "x-requested-with": "XMLHttpRequest",
|
|
|
- }
|
|
|
buffer = "" # 翻页指示器
|
|
|
while True:
|
|
|
if self.download_cnt >= 30:
|
|
|
return
|
|
|
+ self.get_token_from_mysql()
|
|
|
params = {
|
|
|
"action": "get_feed_list",
|
|
|
"username": user_id,
|
|
@@ -153,11 +139,21 @@ class ShiPinHaoAccount:
|
|
|
"count": "15",
|
|
|
"scene": "1",
|
|
|
"token": self.token,
|
|
|
- # "token": "123456",
|
|
|
"lang": "zh_CN",
|
|
|
"f": "json",
|
|
|
"ajax": "1",
|
|
|
}
|
|
|
+ headers = {
|
|
|
+ "authority": "mp.weixin.qq.com",
|
|
|
+ "accept": "*/*",
|
|
|
+ "accept-language": "en,zh-CN;q=0.9,zh;q=0.8",
|
|
|
+ "cookie": self.cookie,
|
|
|
+ "referer": "https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN".format(
|
|
|
+ self.token
|
|
|
+ ),
|
|
|
+ "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36",
|
|
|
+ "x-requested-with": "XMLHttpRequest",
|
|
|
+ }
|
|
|
response = requests.request("GET", url, headers=headers, params=params)
|
|
|
self.token_count += 1
|
|
|
res_json = response.json()
|
|
@@ -181,9 +177,6 @@ class ShiPinHaoAccount:
|
|
|
buffer = res_json["last_buff"]
|
|
|
for obj in res_json["list"]:
|
|
|
print("扫描到一条视频", self.token_count)
|
|
|
- # repeat_flag = self.process_video_obj(obj)
|
|
|
- # if not repeat_flag:
|
|
|
- # return
|
|
|
try:
|
|
|
print("扫描到一条视频")
|
|
|
repeat_flag = self.process_video_obj(obj)
|
|
@@ -238,20 +231,18 @@ class ShiPinHaoAccount:
|
|
|
video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
print(video_dict)
|
|
|
print("成功发送 MQ 至 ETL")
|
|
|
- self.mq.send_msg(video_dict)
|
|
|
+ # self.mq.send_msg(video_dict)
|
|
|
self.download_cnt += 1
|
|
|
return True
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
- # temp_token = "2080949641"
|
|
|
- # temp_cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3930572231|1|fgk|0; uuid=6562bbd8859230ce4120dfa063c76997; rand_info=CAESIGAatjSIjvxVJVDxRDN7F/CNFWMifvAVqje98rd++8UY; slave_bizuin=3236647229; data_bizuin=3236647229; bizuin=3236647229; data_ticket=qm3i6jRhObs1yKHttGh0gVI02Mz7FTPfatn0RMLdaWyD7Ukcokm5Dc3mmYLQUZPg; slave_sid=UWxjZnhBREZRRTNKZ3dYZTlYRE9Db2lxQUhOM3lZUlRoMkV0MG1wdVVudGpQTWxnVkxzYW5pV2c3NjB3bnAyQ2lPaXBBVVRPazEybWtKSVEzTnUyazZ6WEJsdnFaWWVDaUFrM3pTTXRkeUNJS3RNVTc2NFRBWkZiVGQzYllacEFRalBBZ2tXZlltblJYS2VS; slave_user=gh_d284c09295eb; xid=cb96e6ba4b4960d74a22869b1bb21406; _clsk=z77guf|1699532621466|4|1|mp.weixin.qq.com/weheat-agent/payload/record"
|
|
|
SP = ShiPinHaoAccount(
|
|
|
platform="shipinhao",
|
|
|
mode="author",
|
|
|
user_dict={"uid": "123456", "link": "树树读书1014", "user_id": "1234565"},
|
|
|
rule_dict={},
|
|
|
- env="dev",
|
|
|
+ env="prod",
|
|
|
)
|
|
|
|
|
|
SP.get_account_videos()
|