1 年間前 · 1cc8bd53c5
--- a/gongzhongxinhao/gongzhongxinhao/gongzhongxinhao_author.py
+++ b/gongzhongxinhao/gongzhongxinhao/gongzhongxinhao_author.py
@@ -23,13 +23,12 @@ class GZXHAuthor:
 
				     """
			
 
				     公众新号账号爬虫
			
 
				     """
			
 
				-    def __init__(self, platform, mode, rule_dict, user_dict, env, url_id):
			
 
				+    def __init__(self, platform, mode, rule_dict, user_dict, env):
			
 
				         self.platform = platform
			
 
				         self.mode = mode
			
 
				         self.rule_dict = rule_dict
			
 
				         self.user_dict = user_dict
			
 
				         self.env = env
			
 
				-        self.url_id = url_id
			
 
				         self.download_cnt = 0
			
 
				         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
			
 
				 
			
@@ -39,7 +38,7 @@ class GZXHAuthor:
 
				                 platform=self.platform,
			
 
				                 mode=self.mode,
			
 
				                 env=self.env,
			
 
				-                message="开始抓取公众新号: {}".format(self.url_id["name"]),
			
 
				+                message="开始抓取公众新号: {}".format(self.user_dict['link']),
			
 
				             )
			
 
				             try:
			
 
				                 self.get_videoList()
			
@@ -49,14 +48,14 @@ class GZXHAuthor:
 
				                     platform=self.platform,
			
 
				                     mode=self.mode,
			
 
				                     env=self.env,
			
 
				-                    message=f"抓取公众新号: {self.url_id['name']} 时异常,异常信息: {e}",
			
 
				+                    message=f"抓取公众新号: {self.user_dict['link']} 时异常,异常信息: {e}",
			
 
				                 )
			
 
				             AliyunLogger.logging(
			
 
				                 code="1004",
			
 
				                 platform=self.platform,
			
 
				                 mode=self.mode,
			
 
				                 env=self.env,
			
 
				-                message="抓取公众新号: {}".format(self.url_id["name"]),
			
 
				+                message="抓取公众新号: {}".format(self.user_dict['link']),
			
 
				             )
			
 
				 
			
 
				             # 获取腾讯视频下载链接
			
@@ -145,7 +144,7 @@ class GZXHAuthor:
 
				     def get_videoList(self):
			
 
				         mq = MQ(topic_name="topic_crawler_etl_" + self.env)
			
 
				         time.sleep(1)
			
 
				-        wechat_gh = self.get_wechat_gh(self.url_id["url"])
			
 
				+        wechat_gh = self.get_wechat_gh(self.user_dict['link'])
			
 
				         if None == wechat_gh:
			
 
				 
			
 
				             AliyunLogger.logging(
			
@@ -153,7 +152,7 @@ class GZXHAuthor:
 
				                 platform=self.platform,
			
 
				                 mode=self.mode,
			
 
				                 env=self.env,
			
 
				-                message=f"获取用主页为空{self.url_id['name']}",
			
 
				+                message=f"获取用主页为空{self.user_dict['link']}",
			
 
				             )
			
 
				             return
			
 
				         time.sleep(1)
			
@@ -221,7 +220,6 @@ class GZXHAuthor:
 
				         date_time_obj = datetime.strptime(publish_time_str, date_format)
			
 
				         publish_time_stamp = int(date_time_obj.timestamp())
			
 
				         article_url = article.get("url", "")
			
 
				-        our_user = random.choice(self.user_dict)
			
 
				         video_url = self.get_video_url(article_url)
			
 
				         video_dict = {
			
 
				             "user_name": user_name,
			
@@ -237,7 +235,7 @@ class GZXHAuthor:
 
				             "comment_cnt": 0,
			
 
				             "like_cnt": 0,
			
 
				             "share_cnt": 0,
			
 
				-            "user_id": our_user["uid"],
			
 
				+            "user_id": self.user_dict["uid"],
			
 
				             "cover_url": article.get("head_pic", ""),
			
 
				             "video_url": video_url,
			
 
				             "width": 0,
			
@@ -279,8 +277,7 @@ class GZXHAuthor:
 
				                 publish_time_str,
			
 
				                 video_url,
			
 
				                 article.get("head_pic", ""),
			
 
				-                self.url_id["name"],
			
 
				-                self.url_id["url"]
			
 
				+                self.user_dict['link']
			
 
				 
			
 
				             ]]
			
 
				             Feishu.insert_columns('gongzhonghao', 'gongzhonghao', "9QU7wE", "ROWS", 1, 2)
			
--- a/gongzhongxinhao/gongzhongxinhao_main/run_gzxh_author.py
+++ b/gongzhongxinhao/gongzhongxinhao_main/run_gzxh_author.py
@@ -79,15 +79,6 @@ def main(log_type, crawler, topic_name, group_id, env):
 
				                     env=env,
			
 
				                     message=f"抓取规则:{rule_dict}\n",
			
 
				                 )
			
 
				-                audio_type = Feishu.get_values_batch("prod", "gongzhonghao", "QsTym9")
			
 
				-                url_list = []
			
 
				-                for row in audio_type[1:]:
			
 
				-                    name = row[1]
			
 
				-                    url = row[4]
			
 
				-                    number = {"name": name, "url": url}
			
 
				-                    if url:
			
 
				-                        url_list.append(number)
			
 
				-
			
 
				                 # 解析 user_list
			
 
				                 task_id = task_dict["id"]
			
 
				                 select_user_sql = (
			
@@ -103,7 +94,7 @@ def main(log_type, crawler, topic_name, group_id, env):
 
				                     env=env,
			
 
				                     message="开始抓取"
			
 
				                 )
			
 
				-                for url_id in url_list:
			
 
				+                for user_dict in user_list:
			
 
				                     time.sleep(random.randint(1, 5))
			
 
				                     try:
			
 
				                         AliyunLogger.logging(
			
@@ -111,16 +102,15 @@ def main(log_type, crawler, topic_name, group_id, env):
 
				                             platform=crawler,
			
 
				                             mode=log_type,
			
 
				                             env=env,
			
 
				-                            message="开始抓取公众新号{}".format(url_id["name"]),
			
 
				+                            message="开始抓取公众新号{}".format(user_dict),
			
 
				                         )
			
 
				                         # 初始化
			
 
				                         GZXH = GZXHAuthor(
			
 
				                             platform=crawler,
			
 
				                             mode=log_type,
			
 
				                             rule_dict=rule_dict,
			
 
				-                            user_dict=user_list,
			
 
				+                            user_dict=user_dict,
			
 
				                             env=env,
			
 
				-                            url_id=url_id,
			
 
				                         )
			
 
				                         GZXH.get_account_videos()
			
 
				                         AliyunLogger.logging(
			
@@ -128,7 +118,7 @@ def main(log_type, crawler, topic_name, group_id, env):
 
				                             platform=crawler,
			
 
				                             mode=log_type,
			
 
				                             env=env,
			
 
				-                            message="完成抓取公众新号{}".format(url_id["name"]),
			
 
				+                            message="完成抓取公众新号{}".format(user_dict),
			
 
				                         )
			
 
				                     except Exception as e:
			
 
				                         AliyunLogger.logging(
			
@@ -136,7 +126,7 @@ def main(log_type, crawler, topic_name, group_id, env):
 
				                             platform=crawler,
			
 
				                             mode=log_type,
			
 
				                             env=env,
			
 
				-                            message="抓取公众新号{}出现问题， 报错为{}".format(url_id["name"], e),
			
 
				+                            message="抓取公众新号{}出现问题， 报错为{}".format(user_dict, e),
			
 
				                         )