Преглед на файлове

Merge remote-tracking branch 'origin/master' into lxr_etl_20230620

ehlxr преди 2 години
родител
ревизия
11a6c18617
променени са 25 файла, в които са добавени 417 реда и са изтрити 150 реда
  1. 1 1
      benshanzhufu/benshanzhufu_main/run_bszf_recommend.py
  2. 1 1
      douyin/douyin_main/run_dy_author.py
  3. 1 1
      douyin/douyin_main/run_dy_recommend.py
  4. 45 20
      gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py
  5. 42 16
      gongzhonghao/gongzhonghao_author/gongzhonghao1_author_create_user.py
  6. 46 24
      gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py
  7. 46 25
      gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py
  8. 47 24
      gongzhonghao/gongzhonghao_author/gongzhonghao4_author.py
  9. 47 23
      gongzhonghao/gongzhonghao_author/gongzhonghao5_author.py
  10. 21 1
      gongzhonghao/gongzhonghao_main/run_gzh1_author.py
  11. 21 1
      gongzhonghao/gongzhonghao_main/run_gzh2_author.py
  12. 21 1
      gongzhonghao/gongzhonghao_main/run_gzh3_author.py
  13. 21 1
      gongzhonghao/gongzhonghao_main/run_gzh4_author.py
  14. 21 1
      gongzhonghao/gongzhonghao_main/run_gzh5_author.py
  15. 26 0
      gongzhonghao/gongzhonghao_main/run_gzh_author_dev.py
  16. 1 1
      kuaishou/kuaishou_main/run_ks_author.py
  17. 1 1
      kuaishou/kuaishou_main/run_ks_recommend.py
  18. 1 1
      suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_ssnnyfq_recommend.py
  19. 1 1
      xiaoniangao/xiaoniangao_main/run_xng_author.py
  20. 1 1
      xiaoniangao/xiaoniangao_main/run_xng_hour.py
  21. 1 1
      xiaoniangao/xiaoniangao_main/run_xng_play.py
  22. 1 1
      xigua/xigua_main/run_xg_author.py
  23. 1 1
      xigua/xigua_main/run_xg_recommend.py
  24. 1 1
      xigua/xigua_main/run_xg_search.py
  25. 1 1
      xigua/xigua_recommend/xigua_recommend_scheduling.py

+ 1 - 1
benshanzhufu/benshanzhufu_main/run_bszf_recommend.py

@@ -20,7 +20,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
douyin/douyin_main/run_dy_author.py

@@ -18,7 +18,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
douyin/douyin_main/run_dy_recommend.py

@@ -19,7 +19,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 45 - 20
gongzhonghao/gongzhonghao_author/gongzhonghao1_author.py

@@ -69,12 +69,14 @@ class GongzhonghaoAuthor1:
         }
         for k, v in our_user_dict.items():
             Common.logger(log_type, crawler).info(f"{k}:{v}")
+        Common.logging(log_type, crawler, env, f'our_user_dict:{our_user_dict}')
         return our_user_dict
 
     # 获取用户 fakeid
     @classmethod
     def get_user_info(cls, log_type, crawler, wechat_name, env):
         Common.logger(log_type, crawler).info(f"获取站外用户信息:{wechat_name}")
+        Common.logging(log_type, crawler, env, f"获取站外用户信息:{wechat_name}")
         while True:
             token_dict = cls.get_token(log_type, crawler, env)
             url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
@@ -110,25 +112,25 @@ class GongzhonghaoAuthor1:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             user_info_dict = {'user_name': r.json()["list"][0]["nickname"],
                               'user_id': r.json()["list"][0]["fakeid"],
@@ -224,34 +226,35 @@ class GongzhonghaoAuthor1:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "invalid args" and r.json()["base_resp"]["ret"] == 200002:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"公众号:{user_dict['user_name']}\n抓取异常, 请检查该公众号\n")
                 return
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, '没有更多视频了\n')
                 return
             else:
                 begin += 5
@@ -281,13 +284,16 @@ class GongzhonghaoAuthor1:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f'video_dict:{video_dict}')
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
                             Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -296,12 +302,15 @@ class GongzhonghaoAuthor1:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -311,7 +320,9 @@ class GongzhonghaoAuthor1:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
     @classmethod
@@ -331,11 +342,13 @@ class GongzhonghaoAuthor1:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -343,11 +356,15 @@ class GongzhonghaoAuthor1:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -357,6 +374,7 @@ class GongzhonghaoAuthor1:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -414,8 +432,10 @@ class GongzhonghaoAuthor1:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -438,6 +458,7 @@ class GongzhonghaoAuthor1:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
@@ -447,6 +468,7 @@ class GongzhonghaoAuthor1:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             len_sheet = len(user_sheet)
@@ -460,6 +482,7 @@ class GongzhonghaoAuthor1:
                                           i=i,
                                           env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                 try:
                     cls.get_videoList(log_type=log_type,
                                       crawler=crawler,
@@ -467,9 +490,11 @@ class GongzhonghaoAuthor1:
                                       user_dict=user_dict,
                                       env=env)
                     Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                     time.sleep(60)
                 except Exception as e:
                     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                    Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
             break
 
 

+ 42 - 16
gongzhonghao/gongzhonghao_author/gongzhonghao1_author_create_user.py

@@ -53,6 +53,7 @@ class GongzhonghaoAuthor1:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             user_list = []
@@ -66,6 +67,7 @@ class GongzhonghaoAuthor1:
                 if wechat_name is None or wechat_name.strip() == "" or wechat_name.replace(" ", "") == "":
                     wechat_name = user_name
                 Common.logger(log_type, crawler).info(f"befor_wechat_name:{type(wechat_name)}, {wechat_name}")
+                Common.logging(log_type, crawler, env, f"befor_wechat_name:{type(wechat_name)}, {wechat_name}")
                 our_uid = user_sheet[i][5]
                 our_user_link = user_sheet[i][6]
                 user_info_dict = cls.get_user_info(log_type=log_type, crawler=crawler, wechat_name=wechat_name, env=env)
@@ -78,6 +80,7 @@ class GongzhonghaoAuthor1:
                 tag5 = user_sheet[i][11]
                 tag6 = user_sheet[i][12]
                 Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息")
+                Common.logging(log_type, crawler, env, f"正在更新 {user_name} 用户信息")
                 if out_uid is None or our_uid is None:
                     # 用来创建our_id的信息
                     user_dict = {
@@ -89,6 +92,7 @@ class GongzhonghaoAuthor1:
                     }
                     our_uid = getUser.create_uid(log_type, crawler, user_dict, env)
                     Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
+                    Common.logging(log_type, crawler, env, f'新创建的站内UID:{our_uid}')
                     if env == 'prod':
                         our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
                     else:
@@ -96,8 +100,10 @@ class GongzhonghaoAuthor1:
                     Feishu.update_values(log_type, crawler, "Bzv72P", f'D{i + 1}:G{i + 1}', [
                         [user_info_dict["user_id"], user_info_dict["avatar_url"], our_uid, our_user_link]])
                     Common.logger(log_type, crawler).info(f'用户信息创建成功!\n')
+                    Common.logging(log_type, crawler, env, f'用户信息创建成功!\n')
                 else:
                     Common.logger(log_type, crawler).info("用户信息已存在\n")
+                    Common.logging(log_type, crawler, env, "用户信息已存在\n")
                 our_user_dict = {
                     'user_name': user_name,
                     'user_id': out_uid,
@@ -113,6 +119,7 @@ class GongzhonghaoAuthor1:
     @classmethod
     def get_user_info(cls, log_type, crawler, wechat_name, env):
         Common.logger(log_type, crawler).info(f"wechat_name:{wechat_name}")
+        Common.logging(log_type, crawler, env, f"wechat_name:{wechat_name}")
         while True:
             token_dict = cls.get_token(log_type, crawler, env)
             url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
@@ -148,22 +155,22 @@ class GongzhonghaoAuthor1:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
@@ -262,28 +269,29 @@ class GongzhonghaoAuthor1:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
                 continue
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                 time.sleep(60 * 10)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, "没有更多视频了\n")
                 return
             else:
                 begin += 5
@@ -313,13 +321,16 @@ class GongzhonghaoAuthor1:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
                             Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -328,12 +339,15 @@ class GongzhonghaoAuthor1:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -343,7 +357,9 @@ class GongzhonghaoAuthor1:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
     @classmethod
@@ -363,11 +379,13 @@ class GongzhonghaoAuthor1:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -375,11 +393,15 @@ class GongzhonghaoAuthor1:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -389,6 +411,7 @@ class GongzhonghaoAuthor1:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向榜爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -446,8 +469,10 @@ class GongzhonghaoAuthor1:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -470,31 +495,32 @@ class GongzhonghaoAuthor1:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
         user_list = cls.get_users(log_type, crawler, "Bzv72P", env)
         if user_list is None or len(user_list) == 0:
             Common.logger(log_type, crawler).warning(f"抓取用户列表为空\n")
+            Common.logging(log_type, crawler, env, f"抓取用户列表为空\n")
             return
         for user_dict in user_list:
             try:
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                 cls.get_videoList(log_type=log_type,
                                   crawler=crawler,
                                   rule_dict=rule_dict,
                                   user_dict=user_dict,
                                   env=env)
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
             except Exception as e:
                 Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
 
 
 if __name__ == "__main__":
-    # GongzhonghaoAuthor1.get_token("author", "gongzhonghao", "prod")
-    # print(GongzhonghaoAuthor1.get_users("author", "gongzhonghao", "Bzv72P", "dev"))
-    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
-    # print(title_like("author", "gongzhonghao", "公众号", "123", "dev"))
     print(GongzhonghaoAuthor1.get_user_info("author", "gongzhonghao", "幸福花朵", "dev"))
     pass

+ 46 - 24
gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py

@@ -69,6 +69,7 @@ class GongzhonghaoAuthor2:
         }
         for k, v in our_user_dict.items():
             Common.logger(log_type, crawler).info(f"{k}:{v}")
+        Common.logging(log_type, crawler, env, f'our_user_dict:{our_user_dict}')
         return our_user_dict
 
     # 获取用户 fakeid
@@ -109,25 +110,25 @@ class GongzhonghaoAuthor2:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
 
             user_info_dict = {'user_name': r.json()["list"][0]["nickname"],
@@ -223,34 +224,35 @@ class GongzhonghaoAuthor2:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "invalid args" and r.json()["base_resp"]["ret"] == 200002:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"公众号:{user_dict['user_name']}\n抓取异常, 请检查该公众号\n")
                 return
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, '没有更多视频了\n')
                 return
             else:
                 begin += 5
@@ -280,14 +282,16 @@ class GongzhonghaoAuthor2:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f'video_dict:{video_dict}')
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
-                            Common.logger(log_type, crawler).info(
-                                f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -296,12 +300,15 @@ class GongzhonghaoAuthor2:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -311,8 +318,10 @@ class GongzhonghaoAuthor2:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
     @classmethod
@@ -332,11 +341,13 @@ class GongzhonghaoAuthor2:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -344,11 +355,15 @@ class GongzhonghaoAuthor2:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -358,6 +373,7 @@ class GongzhonghaoAuthor2:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -415,8 +431,10 @@ class GongzhonghaoAuthor2:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+        Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -439,6 +457,7 @@ class GongzhonghaoAuthor2:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
@@ -447,11 +466,13 @@ class GongzhonghaoAuthor2:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             len_sheet = len(user_sheet)
             if len_sheet <= 101:
                 Common.logger(log_type, crawler).info("抓取用户数<=100,无需启动第二套抓取脚本\n")
+                Common.logging(log_type, crawler, env, "抓取用户数<=100,无需启动第二套抓取脚本\n")
                 return
             if len_sheet >= 201:
                 len_sheet = 201
@@ -463,21 +484,22 @@ class GongzhonghaoAuthor2:
                                           i=i,
                                           env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                 try:
-                    # Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
                     cls.get_videoList(log_type=log_type,
                                       crawler=crawler,
                                       rule_dict=rule_dict,
                                       user_dict=user_dict,
                                       env=env)
                     Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                     time.sleep(60)
                 except Exception as e:
                     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                    Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
             break
 
 
 if __name__ == "__main__":
     GongzhonghaoAuthor2.get_token("author", "gongzhonghao", "dev")
-    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
     pass

+ 46 - 25
gongzhonghao/gongzhonghao_author/gongzhonghao3_author.py

@@ -68,6 +68,7 @@ class GongzhonghaoAuthor3:
         }
         for k, v in our_user_dict.items():
             Common.logger(log_type, crawler).info(f"{k}:{v}")
+        Common.logging(log_type, crawler, env, f"our_user_dict:{our_user_dict}")
         return our_user_dict
 
     # 获取用户 fakeid
@@ -108,25 +109,25 @@ class GongzhonghaoAuthor3:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
 
             user_info_dict = {'user_name': r.json()["list"][0]["nickname"],
@@ -166,7 +167,6 @@ class GongzhonghaoAuthor3:
                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v113/chromedriver'))
 
         driver.implicitly_wait(10)
-        # Common.logger(log_type, crawler).info('打开文章链接')
         driver.get(article_url)
         time.sleep(1)
 
@@ -224,34 +224,35 @@ class GongzhonghaoAuthor3:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "invalid args" and r.json()["base_resp"]["ret"] == 200002:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"公众号:{user_dict['user_name']}\n抓取异常, 请检查该公众号\n")
                 return
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, '没有更多视频了\n')
                 return
             else:
                 begin += 5
@@ -281,15 +282,17 @@ class GongzhonghaoAuthor3:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f'video_dict:{video_dict}')
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(
                                 rule_dict.get('period', {}).get('max', 1000)):
-                            Common.logger(log_type, crawler).info(
-                                f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -298,12 +301,15 @@ class GongzhonghaoAuthor3:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -313,8 +319,10 @@ class GongzhonghaoAuthor3:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
     @classmethod
@@ -334,11 +342,13 @@ class GongzhonghaoAuthor3:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -346,11 +356,15 @@ class GongzhonghaoAuthor3:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -360,6 +374,7 @@ class GongzhonghaoAuthor3:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -417,8 +432,10 @@ class GongzhonghaoAuthor3:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+        Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -441,6 +458,7 @@ class GongzhonghaoAuthor3:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
@@ -449,11 +467,13 @@ class GongzhonghaoAuthor3:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             len_sheet = len(user_sheet)
             if len_sheet <= 201:
                 Common.logger(log_type, crawler).info("抓取用户数<=200,无需启动第三套抓取脚本\n")
+                Common.logging(log_type, crawler, env, "抓取用户数<=200,无需启动第三套抓取脚本\n")
                 return
             if len_sheet >= 301:
                 len_sheet = 301
@@ -465,21 +485,22 @@ class GongzhonghaoAuthor3:
                                           i=i,
                                           env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                 try:
-                    # Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
                     cls.get_videoList(log_type=log_type,
                                       crawler=crawler,
                                       rule_dict=rule_dict,
                                       user_dict=user_dict,
                                       env=env)
                     Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                     time.sleep(60)
                 except Exception as e:
                     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                    Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
             break
 
 
 if __name__ == "__main__":
     GongzhonghaoAuthor3.get_token("author", "gongzhonghao", "dev")
-    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
     pass

+ 47 - 24
gongzhonghao/gongzhonghao_author/gongzhonghao4_author.py

@@ -69,12 +69,14 @@ class GongzhonghaoAuthor4:
         }
         for k, v in our_user_dict.items():
             Common.logger(log_type, crawler).info(f"{k}:{v}")
+        Common.logging(log_type, crawler, env, f'our_user_dict:{our_user_dict}')
         return our_user_dict
 
     # 获取用户 fakeid
     @classmethod
     def get_user_info(cls, log_type, crawler, wechat_name, env):
         Common.logger(log_type, crawler).info(f"获取站外用户信息:{wechat_name}")
+        Common.logging(log_type, crawler, env, f"获取站外用户信息:{wechat_name}")
         while True:
             token_dict = cls.get_token(log_type, crawler, env)
             url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
@@ -110,25 +112,25 @@ class GongzhonghaoAuthor4:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             user_info_dict = {'user_name': r.json()["list"][0]["nickname"],
                               'user_id': r.json()["list"][0]["fakeid"],
@@ -167,7 +169,6 @@ class GongzhonghaoAuthor4:
                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
 
         driver.implicitly_wait(10)
-        # Common.logger(log_type, crawler).info('打开文章链接')
         driver.get(article_url)
         time.sleep(1)
 
@@ -225,34 +226,35 @@ class GongzhonghaoAuthor4:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "invalid args" and r.json()["base_resp"]["ret"] == 200002:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"公众号:{user_dict['user_name']}\n抓取异常, 请检查该公众号\n")
                 return
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, '没有更多视频了\n')
                 return
             else:
                 begin += 5
@@ -282,13 +284,16 @@ class GongzhonghaoAuthor4:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f'video_dict:{video_dict}')
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
                             Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, "发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -297,12 +302,15 @@ class GongzhonghaoAuthor4:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -312,7 +320,9 @@ class GongzhonghaoAuthor4:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
 
@@ -333,11 +343,13 @@ class GongzhonghaoAuthor4:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -345,11 +357,15 @@ class GongzhonghaoAuthor4:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -359,6 +375,7 @@ class GongzhonghaoAuthor4:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -416,8 +433,10 @@ class GongzhonghaoAuthor4:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -440,6 +459,7 @@ class GongzhonghaoAuthor4:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
@@ -448,11 +468,13 @@ class GongzhonghaoAuthor4:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             len_sheet = len(user_sheet)
             if len_sheet <= 301:
                 Common.logger(log_type, crawler).info("抓取用户数<=300,无需启动第四套抓取脚本\n")
+                Common.logging(log_type, crawler, env, "抓取用户数<=300,无需启动第四套抓取脚本\n")
                 return
             if len_sheet >= 401:
                 len_sheet = 401
@@ -463,21 +485,22 @@ class GongzhonghaoAuthor4:
                                           sheetid=sheetid,
                                           i=i,
                                           env=env)
-                Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logger(log_type, crawler).info(f'获取:{user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取:{user_dict["user_name"]} 公众号视频\n')
                 try:
-                    Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
                     cls.get_videoList(log_type=log_type,
                                       crawler=crawler,
                                       rule_dict=rule_dict,
                                       user_dict=user_dict,
                                       env=env)
                     Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                     time.sleep(60)
                 except Exception as e:
                     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                    Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
             break
 
 if __name__ == "__main__":
     GongzhonghaoAuthor4.get_token("author", "gongzhonghao", "dev")
-    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
     pass

+ 47 - 23
gongzhonghao/gongzhonghao_author/gongzhonghao5_author.py

@@ -32,7 +32,6 @@ class GongzhonghaoAuthor5:
         select_sql = f""" select * from crawler_config where source="{crawler}" and title LIKE "%公众号_5%";"""
         configs = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
         if len(configs) == 0:
-            # Common.logger(log_type, crawler).warning(f"公众号_3未配置token")
             Feishu.bot(log_type, crawler, "公众号_5:未配置token")
             time.sleep(60)
             return None
@@ -70,12 +69,14 @@ class GongzhonghaoAuthor5:
         }
         for k, v in our_user_dict.items():
             Common.logger(log_type, crawler).info(f"{k}:{v}")
+        Common.logging(log_type, crawler, env, f"our_user_dict:{our_user_dict}")
         return our_user_dict
 
     # 获取用户 fakeid
     @classmethod
     def get_user_info(cls, log_type, crawler, wechat_name, env):
         Common.logger(log_type, crawler).info(f"获取站外用户信息:{wechat_name}")
+        Common.logging(log_type, crawler, env, f"获取站外用户信息:{wechat_name}")
         while True:
             token_dict = cls.get_token(log_type, crawler, env)
             url = "https://mp.weixin.qq.com/cgi-bin/searchbiz?"
@@ -111,25 +112,25 @@ class GongzhonghaoAuthor5:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_fakeid:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             user_info_dict = {'user_name': r.json()["list"][0]["nickname"],
                               'user_id': r.json()["list"][0]["fakeid"],
@@ -168,7 +169,6 @@ class GongzhonghaoAuthor5:
                 '/Users/wangkun/Downloads/chromedriver/chromedriver_v111/chromedriver'))
 
         driver.implicitly_wait(10)
-        # Common.logger(log_type, crawler).info('打开文章链接')
         driver.get(article_url)
         time.sleep(1)
 
@@ -226,34 +226,35 @@ class GongzhonghaoAuthor5:
             r = requests.get(url=url, headers=headers, params=params, verify=False)
             r.close()
             if r.json()["base_resp"]["err_msg"] == "invalid session":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "freq control":
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if r.json()["base_resp"]["err_msg"] == "invalid args" and r.json()["base_resp"]["ret"] == 200002:
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler,f"公众号:{user_dict['user_name']}\n抓取异常, 请检查该公众号\n")
                 return
             if 'app_msg_list' not in r.json():
-                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                Common.logger(log_type, crawler).warning(f"status_code:{r.status_code}, get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"status_code:{r.status_code}, get_videoList:{r.text}\n")
                 if 20 >= datetime.datetime.now().hour >= 10:
                     Feishu.bot(log_type, crawler, f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
+                time.sleep(60 * 15)
                 continue
             if len(r.json()['app_msg_list']) == 0:
                 Common.logger(log_type, crawler).info('没有更多视频了\n')
+                Common.logging(log_type, crawler, env, "没有更多视频了\n")
                 return
             else:
                 begin += 5
@@ -283,13 +284,16 @@ class GongzhonghaoAuthor5:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"video_dict:{video_dict}")
 
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
                             Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         if video_dict['article_url'] == 0 or video_dict['video_url'] == 0:
                             Common.logger(log_type, crawler).info("文章涉嫌违反相关法律法规和政策\n")
+                            Common.logging(log_type, crawler, env, "文章涉嫌违反相关法律法规和政策\n")
                         # 标题敏感词过滤
                         elif any(str(word) if str(word) in video_dict['video_title'] else False
                                  for word in get_config_from_mysql(log_type=log_type,
@@ -298,12 +302,15 @@ class GongzhonghaoAuthor5:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info("标题已中过滤词\n")
+                            Common.logging(log_type, crawler, env, "标题已中过滤词\n")
                         # 已下载判断
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info("视频已下载\n")
+                            Common.logging(log_type, crawler, env, "视频已下载\n")
                         # 标题相似度
                         elif title_like(log_type, crawler, video_dict['video_title'], cls.platform, env) is True:
                             Common.logger(log_type, crawler).info(f'标题相似度>=80%:{video_dict["video_title"]}\n')
+                            Common.logging(log_type, crawler, env, f'标题相似度>=80%:{video_dict["video_title"]}\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -313,7 +320,9 @@ class GongzhonghaoAuthor5:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f'抓取单条视频异常:{e}\n')
                 Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                 time.sleep(60)
 
 
@@ -334,11 +343,13 @@ class GongzhonghaoAuthor5:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 获取视频时长
         ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
@@ -346,11 +357,15 @@ class GongzhonghaoAuthor5:
         video_dict["video_height"] = ffmpeg_dict["height"]
         video_dict["duration"] = ffmpeg_dict["duration"]
         Common.logger(log_type, crawler).info(f'video_width:{video_dict["video_width"]}')
+        Common.logging(log_type, crawler, env, f'video_width:{video_dict["video_width"]}')
         Common.logger(log_type, crawler).info(f'video_height:{video_dict["video_height"]}')
+        Common.logging(log_type, crawler, env, f'video_height:{video_dict["video_height"]}')
         Common.logger(log_type, crawler).info(f'duration:{video_dict["duration"]}')
+        Common.logging(log_type, crawler, env, f'duration:{video_dict["duration"]}')
         if download_rule(log_type, crawler, video_dict, rule_dict) is False:
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("不满足抓取规则,删除成功\n")
+            Common.logging(log_type, crawler, env, "不满足抓取规则,删除成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover",
@@ -360,6 +375,7 @@ class GongzhonghaoAuthor5:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         strategy = "定向爬虫策略"
         if env == 'prod':
             oss_endpoint = "inner"
@@ -417,8 +433,10 @@ class GongzhonghaoAuthor5:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "47e39d", "ROWS", 1, 2)
@@ -441,6 +459,7 @@ class GongzhonghaoAuthor5:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "47e39d", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频下载/上传成功\n')
+        Common.logging(log_type, crawler, env, '视频下载/上传成功\n')
 
     @classmethod
     def get_all_videos(cls, log_type, crawler, rule_dict, env):
@@ -449,11 +468,13 @@ class GongzhonghaoAuthor5:
             user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
             if user_sheet is None:
                 Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet}, 2秒后重试")
+                Common.logging(log_type, crawler, env, f"user_sheet:{user_sheet}, 2秒后重试")
                 time.sleep(2)
                 continue
             len_sheet = len(user_sheet)
             if len_sheet <= 401:
                 Common.logger(log_type, crawler).info("抓取用户数<=400,无需启动第五套抓取脚本\n")
+                Common.logging(log_type, crawler, env, "抓取用户数<=400,无需启动第五套抓取脚本\n")
                 return
             # if len_sheet >= 501:
             #     len_sheet = 501
@@ -465,21 +486,24 @@ class GongzhonghaoAuthor5:
                                           i=i,
                                           env=env)
                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                 try:
                     Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
+                    Common.logging(log_type, crawler, env, f'获取 {user_dict["user_name"]} 公众号视频\n')
                     cls.get_videoList(log_type=log_type,
                                       crawler=crawler,
                                       rule_dict=rule_dict,
                                       user_dict=user_dict,
                                       env=env)
                     Common.logger(log_type, crawler).info('休眠 60 秒\n')
+                    Common.logging(log_type, crawler, env, '休眠 60 秒\n')
                     time.sleep(60)
                 except Exception as e:
                     Common.logger(log_type, crawler).info(f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
+                    Common.logging(log_type, crawler, env, f'抓取{user_dict["user_name"]}公众号时异常:{e}\n')
             break
 
 
 if __name__ == "__main__":
     GongzhonghaoAuthor5.get_token("author", "gongzhonghao", "dev")
-    # print(get_config_from_mysql("author", "gongzhonghao", "dev", "filter", action=""))
     pass

+ 21 - 1
gongzhonghao/gongzhonghao_main/run_gzh1_author.py

@@ -25,13 +25,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                          f'WaitSeconds:{wait_seconds}\n'
+                                          f'TopicName:{topic_name}\n'
+                                          f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -47,6 +51,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                      f"MessageId:{msg.message_id}\n"
+                                                      f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                      f"MessageTag:{msg.message_tag}\n"
+                                                      f"ConsumedTimes:{msg.consumed_times}\n"
+                                                      f"PublishTime:{msg.publish_time}\n"
+                                                      f"Body:{msg.message_body}\n"
+                                                      f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                      f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                      f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -54,21 +68,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 GongzhonghaoAuthor1.get_all_videos(log_type=log_type,
                                                     crawler=crawler,
                                                     rule_dict=rule_dict,
                                                     env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 21 - 1
gongzhonghao/gongzhonghao_main/run_gzh2_author.py

@@ -25,13 +25,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -47,6 +51,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -54,21 +68,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 GongzhonghaoAuthor2.get_all_videos(log_type=log_type,
                                                     crawler=crawler,
                                                     rule_dict=rule_dict,
                                                     env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 21 - 1
gongzhonghao/gongzhonghao_main/run_gzh3_author.py

@@ -25,13 +25,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -47,6 +51,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 Common.logger(log_type, crawler).info("回传 ack 消息")
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
@@ -56,21 +70,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 GongzhonghaoAuthor3.get_all_videos(log_type=log_type,
                                                     crawler=crawler,
                                                     rule_dict=rule_dict,
                                                     env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 21 - 1
gongzhonghao/gongzhonghao_main/run_gzh4_author.py

@@ -25,13 +25,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -47,6 +51,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -54,21 +68,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 GongzhonghaoAuthor4.get_all_videos(log_type=log_type,
                                                     crawler=crawler,
                                                     rule_dict=rule_dict,
                                                     env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 21 - 1
gongzhonghao/gongzhonghao_main/run_gzh5_author.py

@@ -25,13 +25,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
     batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -47,6 +51,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -54,21 +68,27 @@ def main(log_type, crawler, topic_name, group_id, env):
                 task_dict = task_fun_mq(msg.message_body)['task_dict']
                 rule_dict = task_fun_mq(msg.message_body)['rule_dict']
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}\n")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}\n")
                 Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 GongzhonghaoAuthor5.get_all_videos(log_type=log_type,
                                                     crawler=crawler,
                                                     rule_dict=rule_dict,
                                                     env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 26 - 0
gongzhonghao/gongzhonghao_main/run_gzh_author_dev.py

@@ -0,0 +1,26 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/20
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+# from gongzhonghao.gongzhonghao_author.gongzhonghao1_author import GongzhonghaoAuthor1
+from gongzhonghao.gongzhonghao_author.gongzhonghao2_author import GongzhonghaoAuthor2
+
+
+def gzh_main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info("开始抓取:公众号")
+    Common.logging(log_type, crawler, env, "开始抓取:公众号")
+    # GongzhonghaoAuthor1.get_all_videos(log_type=log_type,
+    GongzhonghaoAuthor2.get_all_videos(log_type=log_type,
+                                       crawler=crawler,
+                                       rule_dict={"period": {"max": 1, "min": 1}, "duration": {"max": 2700, "min": 20}},
+                                       env=env)
+    Common.del_logs(log_type, crawler)
+    Common.logger(log_type, crawler).info('抓取一轮结束\n')
+    Common.logging(log_type, crawler, env, '抓取一轮结束\n')
+
+
+if __name__ == "__main__":
+    gzh_main(log_type="author", crawler="gongzhonghao", env="dev")

+ 1 - 1
kuaishou/kuaishou_main/run_ks_author.py

@@ -18,7 +18,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
kuaishou/kuaishou_main/run_ks_recommend.py

@@ -19,7 +19,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_ssnnyfq_recommend.py

@@ -20,7 +20,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xiaoniangao/xiaoniangao_main/run_xng_author.py

@@ -18,7 +18,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xiaoniangao/xiaoniangao_main/run_xng_hour.py

@@ -20,7 +20,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xiaoniangao/xiaoniangao_main/run_xng_play.py

@@ -19,7 +19,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xigua/xigua_main/run_xg_author.py

@@ -18,7 +18,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xigua/xigua_main/run_xg_recommend.py

@@ -19,7 +19,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xigua/xigua_main/run_xg_search.py

@@ -18,7 +18,7 @@ def main(log_type, crawler, topic_name, group_id, env):
     # 长轮询时间3秒(最多可设置为30秒)。
     wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 10
+    batch = 1
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'

+ 1 - 1
xigua/xigua_recommend/xigua_recommend_scheduling.py

@@ -603,7 +603,7 @@ class XiguarecommendScheduling:
     @classmethod
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
         queryCount = 1
-        while True:
+        for i in range(100):
             Common.logger(log_type, crawler).info(f"正在抓取第{queryCount}页视频")
             Common.logging(log_type, crawler, env, f"正在抓取第{queryCount}页视频")
             try: