|
@@ -23,13 +23,12 @@ class GZXHAuthor:
|
|
|
"""
|
|
|
公众新号账号爬虫
|
|
|
"""
|
|
|
- def __init__(self, platform, mode, rule_dict, user_dict, env, url_id):
|
|
|
+ def __init__(self, platform, mode, rule_dict, user_dict, env):
|
|
|
self.platform = platform
|
|
|
self.mode = mode
|
|
|
self.rule_dict = rule_dict
|
|
|
self.user_dict = user_dict
|
|
|
self.env = env
|
|
|
- self.url_id = url_id
|
|
|
self.download_cnt = 0
|
|
|
self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
|
|
|
@@ -39,7 +38,7 @@ class GZXHAuthor:
|
|
|
platform=self.platform,
|
|
|
mode=self.mode,
|
|
|
env=self.env,
|
|
|
- message="开始抓取公众新号: {}".format(self.url_id["name"]),
|
|
|
+ message="开始抓取公众新号: {}".format(self.user_dict['link']),
|
|
|
)
|
|
|
try:
|
|
|
self.get_videoList()
|
|
@@ -49,14 +48,14 @@ class GZXHAuthor:
|
|
|
platform=self.platform,
|
|
|
mode=self.mode,
|
|
|
env=self.env,
|
|
|
- message=f"抓取公众新号: {self.url_id['name']} 时异常,异常信息: {e}",
|
|
|
+ message=f"抓取公众新号: {self.user_dict['link']} 时异常,异常信息: {e}",
|
|
|
)
|
|
|
AliyunLogger.logging(
|
|
|
code="1004",
|
|
|
platform=self.platform,
|
|
|
mode=self.mode,
|
|
|
env=self.env,
|
|
|
- message="抓取公众新号: {}".format(self.url_id["name"]),
|
|
|
+ message="抓取公众新号: {}".format(self.user_dict['link']),
|
|
|
)
|
|
|
|
|
|
# 获取腾讯视频下载链接
|
|
@@ -145,7 +144,7 @@ class GZXHAuthor:
|
|
|
def get_videoList(self):
|
|
|
mq = MQ(topic_name="topic_crawler_etl_" + self.env)
|
|
|
time.sleep(1)
|
|
|
- wechat_gh = self.get_wechat_gh(self.url_id["url"])
|
|
|
+ wechat_gh = self.get_wechat_gh(self.user_dict['link'])
|
|
|
if None == wechat_gh:
|
|
|
|
|
|
AliyunLogger.logging(
|
|
@@ -153,7 +152,7 @@ class GZXHAuthor:
|
|
|
platform=self.platform,
|
|
|
mode=self.mode,
|
|
|
env=self.env,
|
|
|
- message=f"获取用主页为空{self.url_id['name']}",
|
|
|
+ message=f"获取用主页为空{self.user_dict['link']}",
|
|
|
)
|
|
|
return
|
|
|
time.sleep(1)
|
|
@@ -221,7 +220,6 @@ class GZXHAuthor:
|
|
|
date_time_obj = datetime.strptime(publish_time_str, date_format)
|
|
|
publish_time_stamp = int(date_time_obj.timestamp())
|
|
|
article_url = article.get("url", "")
|
|
|
- our_user = random.choice(self.user_dict)
|
|
|
video_url = self.get_video_url(article_url)
|
|
|
video_dict = {
|
|
|
"user_name": user_name,
|
|
@@ -237,7 +235,7 @@ class GZXHAuthor:
|
|
|
"comment_cnt": 0,
|
|
|
"like_cnt": 0,
|
|
|
"share_cnt": 0,
|
|
|
- "user_id": our_user["uid"],
|
|
|
+ "user_id": self.user_dict["uid"],
|
|
|
"cover_url": article.get("head_pic", ""),
|
|
|
"video_url": video_url,
|
|
|
"width": 0,
|
|
@@ -279,8 +277,7 @@ class GZXHAuthor:
|
|
|
publish_time_str,
|
|
|
video_url,
|
|
|
article.get("head_pic", ""),
|
|
|
- self.url_id["name"],
|
|
|
- self.url_id["url"]
|
|
|
+ self.user_dict['link']
|
|
|
|
|
|
]]
|
|
|
Feishu.insert_columns('gongzhonghao', 'gongzhonghao', "9QU7wE", "ROWS", 1, 2)
|