wangkun 1 rok pred
rodič
commit
80019f759a

+ 3 - 3
benshanzhufu/benshanzhufu_main/run_bszf_recommend.py

@@ -18,9 +18,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -56,7 +56,7 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 BenshanzhufuRecommend.get_videoList(log_type=log_type,
                                                     crawler=crawler,
                                                     our_uid=our_uid,

+ 4 - 4
douyin/douyin_main/run_dy_author.py

@@ -16,9 +16,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -66,8 +66,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 DouyinauthorScheduling.get_author_videos(log_type=log_type,
                                                          crawler=crawler,
                                                          rule_dict=rule_dict,

+ 4 - 4
douyin/douyin_main/run_dy_recommend.py

@@ -17,9 +17,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -71,8 +71,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 DouyinrecommendScheduling.get_videoList(log_type=log_type,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,

+ 1 - 1
ganggangdouchuan/ganggangdouchuan_main/run_ganggangdouchuan_recommend.py

@@ -11,7 +11,7 @@ from ganggangdouchuan.ganggangdouchuan_recommend.ganggangdouchuan_recommend impo
 
 def main(log_type, crawler, env):
     oss_endpoint = "out"
-    Common.logger(log_type, crawler).info('开始抓取 刚刚都传小程序\n')
+    Common.logger(log_type, crawler).info('开始抓取:刚刚都传小程序\n')
     GanggangdouchuanRecommend.start_wechat(log_type, crawler, oss_endpoint, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')

+ 1 - 1
jixiangxingfu/jixiangxingfu_main/run_jixiangxingfu_recommend.py

@@ -10,7 +10,7 @@ from jixiangxingfu.jixiangxingfu_recommend.jixiangxingfu_recommend import Jixian
 
 
 def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 吉祥幸福小程序\n')
+    Common.logger(log_type, crawler).info('开始抓取:吉祥幸福小程序\n')
     JixiangxingfuRecommend.start_wechat(log_type, crawler, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')

+ 4 - 4
kuaishou/kuaishou_main/run_ks_author.py

@@ -16,9 +16,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -66,8 +66,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 KuaishouauthorScheduling.get_author_videos(log_type=log_type,
                                                            crawler=crawler,
                                                            rule_dict=rule_dict,

+ 4 - 4
kuaishou/kuaishou_main/run_ks_recommend.py

@@ -17,9 +17,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -71,8 +71,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 KuaiShouRecommendScheduling.get_videoList(log_type=log_type,
                                                           crawler=crawler,
                                                           rule_dict=rule_dict,

+ 2 - 2
shipinhao/shipinhao_main/run_shipinhao_search.py

@@ -10,8 +10,8 @@ from shipinhao.shipinhao_search.shipinhao_search import ShipinhaoSearch
 
 
 def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 视频号 搜索策略\n')
-    Common.logging(log_type, crawler, env, '开始抓取 视频号 搜索策略\n')
+    Common.logger(log_type, crawler).info('开始抓取:视频号搜索\n')
+    Common.logging(log_type, crawler, env, '开始抓取:视频号搜索\n')
     ShipinhaoSearch.get_search_videos(log_type, crawler, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')

+ 3 - 3
suisuiniannianyingfuqi/suisuiniannianyingfuqi_main/run_ssnnyfq_recommend.py

@@ -18,9 +18,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -56,7 +56,7 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
                 Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
-                Common.logger(log_type, crawler).info('开始抓取 岁岁年年迎福气小程序\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
                 SuisuiniannianyingfuqiRecommendScheduling.get_videoList(log_type=log_type,
                                                                         crawler=crawler,
                                                                         our_uid=our_uid,

+ 18 - 0
xiaoniangao/xiaoniangao_author/xiaoniangao_author_scheduling.py

@@ -81,12 +81,15 @@ class XiaoniangaoAuthorScheduling:
             r = requests.post(url=url, headers=headers, json=json_text, proxies=proxies, verify=False)
             if 'data' not in r.text or r.status_code != 200:
                 Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
+                Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
                 return
             elif 'list' not in r.json()['data']:
                 Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}\n")
+                Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
                 return
             elif len(r.json()['data']['list']) == 0:
                 Common.logger(log_type, crawler).info(f"没有更多数据啦~\n")
+                Common.logging(log_type, crawler, env, f"没有更多数据啦~\n")
                 return
             else:
                 next_t = r.json()["data"]["next_t"]
@@ -135,16 +138,20 @@ class XiaoniangaoAuthorScheduling:
                         }
                         for k, v in video_dict.items():
                             Common.logger(log_type, crawler).info(f"{k}:{v}")
+                        Common.logging(log_type, crawler, env, f"{video_dict}")
                         if int(time.time()) - publish_time_stamp > 3600 * 24 * int(rule_dict.get('period', {}).get('max', 1000)):
                             Common.logger(log_type, crawler).info(f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
+                            Common.logging(log_type, crawler, env, f"发布时间超过{int(rule_dict.get('period', {}).get('max', 1000))}天\n")
                             return
 
                         # 过滤无效视频
                         if video_title == "" or video_dict["video_id"] == "" or video_dict["video_url"] == "":
                             Common.logger(log_type, crawler).info("无效视频\n")
+                            Common.logging(log_type, crawler, env, "无效视频\n")
                         # 抓取基础规则过滤
                         elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                             Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                            Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                         elif any(str(word) if str(word) in video_dict["video_title"] else False
                                  for word in get_config_from_mysql(log_type=log_type,
                                                                    source=crawler,
@@ -152,8 +159,10 @@ class XiaoniangaoAuthorScheduling:
                                                                    text="filter",
                                                                    action="")) is True:
                             Common.logger(log_type, crawler).info('已中过滤词\n')
+                            Common.logging(log_type, crawler, env, '已中过滤词\n')
                         elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                             Common.logger(log_type, crawler).info('视频已下载\n')
+                            Common.logging(log_type, crawler, env, '视频已下载\n')
                         else:
                             cls.download_publish(log_type=log_type,
                                                  crawler=crawler,
@@ -163,6 +172,7 @@ class XiaoniangaoAuthorScheduling:
                                                  env=env)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                        Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
     # 下载/上传
     @classmethod
@@ -175,11 +185,13 @@ class XiaoniangaoAuthorScheduling:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_dict["video_title"], url=video_dict["cover_url"])
@@ -188,6 +200,7 @@ class XiaoniangaoAuthorScheduling:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -245,8 +258,10 @@ class XiaoniangaoAuthorScheduling:
                                         {int(video_dict['video_width'])},
                                         {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息些入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息些入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "Wu0CeL", "ROWS", 1, 2)
@@ -273,6 +288,7 @@ class XiaoniangaoAuthorScheduling:
         time.sleep(1)
         Feishu.update_values(log_type, crawler, "Wu0CeL", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
+        Common.logging(log_type, crawler, env, '视频信息写入飞书成功\n')
 
     # 获取所有关注列表的用户视频
     @classmethod
@@ -280,6 +296,7 @@ class XiaoniangaoAuthorScheduling:
         for user_dict in user_list:
             try:
                 Common.logger(log_type, crawler).info(f"获取 {user_dict['nick_name']} 主页视频")
+                Common.logging(log_type, crawler, env, f"获取 {user_dict['nick_name']} 主页视频")
                 cls.get_videoList(log_type=log_type,
                                   crawler=crawler,
                                   rule_dict=rule_dict,
@@ -287,6 +304,7 @@ class XiaoniangaoAuthorScheduling:
                                   env=env)
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取{user_dict['nick_name']}主页时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取{user_dict['nick_name']}主页时异常:{e}\n")
 
 
 if __name__ == "__main__":

+ 37 - 7
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour_scheduling.py

@@ -111,15 +111,19 @@ class XiaoniangaoHourScheduling:
         r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
         if 'data' not in r.text or r.status_code != 200:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+            Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
             return
         elif "data" not in r.json():
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
+            Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
             return
         elif "list" not in r.json()["data"]:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
+            Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']}\n")
             return
         elif len(r.json()['data']['list']) == 0:
             Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
+            Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']['list']}\n")
             return
         else:
             # 视频列表数据
@@ -168,13 +172,15 @@ class XiaoniangaoHourScheduling:
                     }
                     for k, v in video_dict.items():
                         Common.logger(log_type, crawler).info(f"{k}:{v}")
-
+                    Common.logging(log_type, crawler, env, f"{video_dict}")
                     # 过滤无效视频
                     if video_title == "" or video_dict["video_id"] == "" or video_dict["video_url"] == "":
                         Common.logger(log_type, crawler).warning("无效视频\n")
+                        Common.logging(log_type, crawler, env, "无效视频\n")
                     # 抓取基础规则过滤
                     elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                         Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                        Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                     elif any(str(word) if str(word) in video_dict["video_title"] else False
                              for word in get_config_from_mysql(log_type=log_type,
                                                                source=crawler,
@@ -182,8 +188,10 @@ class XiaoniangaoHourScheduling:
                                                                text="filter",
                                                                action="")) is True:
                         Common.logger(log_type, crawler).info('已中过滤词\n')
+                        Common.logging(log_type, crawler, env, '已中过滤词\n')
                     elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                         Common.logger(log_type, crawler).info('视频已下载\n')
+                        Common.logging(log_type, crawler, env, '视频已下载\n')
                     else:
                         # 写入飞书小时级feeds数据库表
                         insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
@@ -214,10 +222,13 @@ class XiaoniangaoHourScheduling:
                         "{time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))}"
                         )"""
                         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+                        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
                         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
                         Common.logger(log_type, crawler).info('视频信息写入小时级数据库成功!\n')
+                        Common.logging(log_type, crawler, env, '视频信息写入小时级数据库成功!\n')
                 except Exception as e:
                     Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                    Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
 
     @classmethod
     def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
@@ -326,6 +337,7 @@ class XiaoniangaoHourScheduling:
         update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
         if len(update_video_list) == 0:
             Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
+            Common.logging(log_type, crawler, env, "暂无需要更新的小时榜数据\n")
             return
         for update_video_info in update_video_list:
             try:
@@ -342,6 +354,7 @@ class XiaoniangaoHourScheduling:
                                                          v_id=video_id)
                     ten_play_cnt = video_info_dict['play_cnt']
                     Common.logger(log_type, crawler).info(f"ten_play_cnt:{ten_play_cnt}")
+                    Common.logging(log_type, crawler, env, f"ten_play_cnt:{ten_play_cnt}")
                     update_sql = f""" update crawler_xiaoniangao_hour set ten_play_cnt={ten_play_cnt} WHERE out_video_id="{video_id}"; """
                     # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
                     MysqlHelper.update_values(log_type, crawler, update_sql, env)
@@ -361,6 +374,7 @@ class XiaoniangaoHourScheduling:
                                                          v_id=video_id)
                     fifteen_play_cnt = video_info_dict['play_cnt']
                     Common.logger(log_type, crawler).info(f"fifteen_play_cnt:{fifteen_play_cnt}")
+                    Common.logging(log_type, crawler, env, f"fifteen_play_cnt:{fifteen_play_cnt}")
                     update_sql = f""" update crawler_xiaoniangao_hour set fifteen_play_cnt={fifteen_play_cnt} WHERE out_video_id="{video_id}"; """
                     # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
                     MysqlHelper.update_values(log_type, crawler, update_sql, env)
@@ -380,6 +394,7 @@ class XiaoniangaoHourScheduling:
                                                          v_id=video_id)
                     twenty_play_cnt = video_info_dict['play_cnt']
                     Common.logger(log_type, crawler).info(f"twenty_play_cnt:{twenty_play_cnt}")
+                    Common.logging(log_type, crawler, env, f"twenty_play_cnt:{twenty_play_cnt}")
                     update_sql = f""" update crawler_xiaoniangao_hour set twenty_play_cnt={twenty_play_cnt} WHERE out_video_id="{video_id}"; """
                     # Common.logger(log_type, crawler).info(f"update_sql:{update_sql}")
                     MysqlHelper.update_values(log_type, crawler, update_sql, env)
@@ -394,6 +409,7 @@ class XiaoniangaoHourScheduling:
                     pass
             except Exception as e:
                 Common.logger(log_type, crawler).error(f'更新{update_video_info["video_title"]}时异常:{e}\n')
+                Common.logging(log_type, crawler, env, f'更新{update_video_info["video_title"]}时异常:{e}\n')
 
     @classmethod
     def download(cls, log_type, crawler, video_info_dict, rule_dict, our_uid, env):
@@ -406,11 +422,13 @@ class XiaoniangaoHourScheduling:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_info_dict["video_title"],
@@ -420,6 +438,7 @@ class XiaoniangaoHourScheduling:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -477,8 +496,10 @@ class XiaoniangaoHourScheduling:
                                                     {int(video_info_dict['video_width'])},
                                                     {int(video_info_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息插入数据库成功!')
+        Common.logging(log_type, crawler, env, '视频信息插入数据库成功!')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "yatRv2", "ROWS", 1, 2)
@@ -505,16 +526,18 @@ class XiaoniangaoHourScheduling:
         time.sleep(1)
         Feishu.update_values(log_type, crawler, "yatRv2", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
+        Common.logging(log_type, crawler, env, '视频信息写入飞书成功\n')
 
     # 下载/上传
     @classmethod
     def download_publish(cls, log_type, crawler, video_info_dict, rule_dict, update_video_info, our_uid, env):
         if cls.repeat_video(log_type, crawler, video_info_dict["video_id"], env) != 0:
             Common.logger(log_type, crawler).info('视频已下载\n')
+            Common.logging(log_type, crawler, env, '视频已下载\n')
         # 播放量大于 50000,直接下载
         elif int(video_info_dict["play_cnt"]) >= 30000:
-            Common.logger(log_type, crawler).info(
-                f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
+            Common.logger(log_type, crawler).info(f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
+            Common.logging(log_type, crawler, env, f"播放量:{video_info_dict['play_cnt']} >= 30000,满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -525,9 +548,10 @@ class XiaoniangaoHourScheduling:
         # 上升榜判断逻辑,任意时间段上升量>=5000,连续两个时间段上升量>=2000
         elif int(update_video_info['ten_play_cnt']) >= 3000 or int(
                 update_video_info['fifteen_play_cnt']) >= 3000 or int(update_video_info['twenty_play_cnt']) >= 3000:
-            Common.logger(log_type, crawler).info(
-                f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
+            Common.logger(log_type, crawler).info(f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
+            Common.logging(log_type, crawler, env, f"10:00 or 15:00 or 20:00 数据上升量:{int(update_video_info['ten_play_cnt'])} or {int(update_video_info['fifteen_play_cnt'])} or {int(update_video_info['twenty_play_cnt'])} >= 3000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
+            Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -536,9 +560,10 @@ class XiaoniangaoHourScheduling:
                          env=env)
 
         elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['fifteen_play_cnt']) >= 1000:
-            Common.logger(log_type, crawler).info(
-                f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
+            Common.logger(log_type, crawler).info(f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
+            Common.logging(log_type, crawler, env, f"10:00 and 15:00 数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['fifteen_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
+            Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -549,7 +574,9 @@ class XiaoniangaoHourScheduling:
         elif int(update_video_info['fifteen_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
                 f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
+            Common.logging(log_type, crawler, env, f"15:00 and 20:00 数据上升量:{int(update_video_info['fifteen_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
+            Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -560,7 +587,9 @@ class XiaoniangaoHourScheduling:
         elif int(update_video_info['ten_play_cnt']) >= 1000 and int(update_video_info['twenty_play_cnt']) >= 1000:
             Common.logger(log_type, crawler).info(
                 f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
+            Common.logging(log_type, crawler, env, f"今日10:00 / 20:00数据上升量:{int(update_video_info['ten_play_cnt'])} and {int(update_video_info['twenty_play_cnt'])} >= 1000")
             Common.logger(log_type, crawler).info("满足下载规则,开始下载视频")
+            Common.logging(log_type, crawler, env, "满足下载规则,开始下载视频")
             cls.download(log_type=log_type,
                          crawler=crawler,
                          video_info_dict=video_info_dict,
@@ -570,6 +599,7 @@ class XiaoniangaoHourScheduling:
 
         else:
             Common.logger(log_type, crawler).info("上升量不满足下载规则")
+            Common.logging(log_type, crawler, env, "上升量不满足下载规则")
 
 
 if __name__ == "__main__":

+ 24 - 3
xiaoniangao/xiaoniangao_main/run_xng_author.py

@@ -16,13 +16,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -38,6 +42,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -48,9 +62,13 @@ def main(log_type, crawler, topic_name, group_id, env):
                 select_user_sql = f"""select * from crawler_user_v3 where task_id={task_id}"""
                 user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
                 Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 Common.logger(log_type, crawler).info(f"用户列表:\n{user_list}")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f"用户列表:\n{user_list}")
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 XiaoniangaoAuthorScheduling.get_author_videos(log_type=log_type,
                                                               crawler=crawler,
                                                               user_list=user_list,
@@ -58,14 +76,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                               env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 28 - 3
xiaoniangao/xiaoniangao_main/run_xng_hour.py

@@ -18,13 +18,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -40,6 +44,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -54,18 +68,24 @@ def main(log_type, crawler, topic_name, group_id, env):
                     our_uid_list.append(user["uid"])
                 our_uid = random.choice(our_uid_list)
                 Common.logger(log_type, crawler).info(f"调度任务:\n{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:\n{rule_dict}")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 # 获取符合规则的视频,写入小时级数据_feeds
                 for i in range(1, 101):
                     try:
                         Common.logger(log_type, crawler).info(f"正在抓取第{i}页")
+                        Common.logging(log_type, crawler, env, f"正在抓取第{i}页")
                         XiaoniangaoHourScheduling.get_videoList(log_type, crawler, rule_dict, env)
                     except Exception as err:
                         Common.logger(log_type, crawler).info(f"抓取第{i}页时异常:{err}\n")
+                        Common.logging(log_type, crawler, env, f"抓取第{i}页时异常:{err}\n")
                 now = datetime.datetime.now()
                 if now.hour == 10 and 0 <= now.minute <= 10:
                     Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+                    Common.logging(log_type, crawler, env, "开始更新/下载上升榜")
                     XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
                                                                crawler=crawler,
                                                                rule_dict=rule_dict,
@@ -74,6 +94,7 @@ def main(log_type, crawler, topic_name, group_id, env):
 
                 elif now.hour == 15 and now.minute <= 10:
                     Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+                    Common.logging(log_type, crawler, env, "开始更新/下载上升榜")
                     XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
                                                                crawler=crawler,
                                                                rule_dict=rule_dict,
@@ -82,6 +103,7 @@ def main(log_type, crawler, topic_name, group_id, env):
 
                 elif now.hour == 20 and now.minute <= 10:
                     Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+                    Common.logging(log_type, crawler, env, "开始更新/下载上升榜")
                     XiaoniangaoHourScheduling.update_videoList(log_type=log_type,
                                                                crawler=crawler,
                                                                rule_dict=rule_dict,
@@ -89,14 +111,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                                env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 23 - 3
xiaoniangao/xiaoniangao_main/run_xng_play.py

@@ -17,13 +17,17 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
                                           f'MQConsumer:{group_id}')
+    Common.logging(log_type, crawler, env, f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
+                                           f'WaitSeconds:{wait_seconds}\n'
+                                           f'TopicName:{topic_name}\n'
+                                           f'MQConsumer:{group_id}')
     while True:
         try:
             # 长轮询消费消息。
@@ -39,6 +43,16 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
                                                       f"Properties:{msg.properties}")
+                Common.logging(log_type, crawler, env, f"Receive\n"
+                                                       f"MessageId:{msg.message_id}\n"
+                                                       f"MessageBodyMD5:{msg.message_body_md5}\n"
+                                                       f"MessageTag:{msg.message_tag}\n"
+                                                       f"ConsumedTimes:{msg.consumed_times}\n"
+                                                       f"PublishTime:{msg.publish_time}\n"
+                                                       f"Body:{msg.message_body}\n"
+                                                       f"NextConsumeTime:{msg.next_consume_time}\n"
+                                                       f"ReceiptHandle:{msg.receipt_handle}\n"
+                                                       f"Properties:{msg.properties}")
                 # ack_mq_message
                 ack_message(log_type=log_type, crawler=crawler, recv_msgs=recv_msgs, consumer=consumer)
 
@@ -53,9 +67,12 @@ def main(log_type, crawler, topic_name, group_id, env):
                     our_uid_list.append(user["uid"])
                 our_uid = random.choice(our_uid_list)
                 Common.logger(log_type, crawler).info(f"调度任务:{task_dict}")
+                Common.logging(log_type, crawler, env, f"调度任务:{task_dict}")
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
+                Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 XiaoniangaoplayScheduling.get_videoList(log_type=log_type,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,
@@ -63,14 +80,17 @@ def main(log_type, crawler, topic_name, group_id, env):
                                                         env=env)
                 Common.del_logs(log_type, crawler)
                 Common.logger(log_type, crawler).info('抓取一轮结束\n')
+                Common.logging(log_type, crawler, env, '抓取一轮结束\n')
 
         except MQExceptionBase as err:
             # Topic中没有消息可消费。
             if err.type == "MessageNotExist":
                 Common.logger(log_type, crawler).info(f"No new message! RequestId:{err.req_id}\n")
+                Common.logging(log_type, crawler, env, f"No new message! RequestId:{err.req_id}\n")
                 continue
 
             Common.logger(log_type, crawler).info(f"Consume Message Fail! Exception:{err}\n")
+            Common.logging(log_type, crawler, env, f"Consume Message Fail! Exception:{err}\n")
             time.sleep(2)
             continue
 

+ 24 - 0
xiaoniangao/xiaoniangao_main/run_xng_play_dev.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/14
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from xiaoniangao.xiaoniangao_play.xiaoniangao_play_scheduling import XiaoniangaoplayScheduling
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info(f'开始抓取 西瓜推荐\n')
+    Common.logging(log_type, crawler, env, "开始抓取 西瓜推荐\n")
+    XiaoniangaoplayScheduling.get_videoList(log_type=log_type,
+                                            crawler=crawler,
+                                            rule_dict={"duration":{"min":40,"max":0},"play_cnt":{"min":20000,"max":0},"period":{"min":60,"max":60}},
+                                            our_uid=6267140,
+                                            env=env)
+    Common.logger(log_type, crawler).info("抓取一轮结束\n")
+    Common.logging(log_type, crawler, env, "抓取一轮结束\n")
+
+
+if __name__ == "__main__":
+    main("recommend", "xiaoniangao", "dev")

+ 18 - 0
xiaoniangao/xiaoniangao_play/xiaoniangao_play_scheduling.py

@@ -33,6 +33,7 @@ class XiaoniangaoplayScheduling:
         for page in range(1, 101):
             try:
                 Common.logger(log_type, crawler).info(f"正在抓取第{page}页")
+                Common.logging(log_type, crawler, env, f"正在抓取第{page}页")
                 url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
                 headers = {
                     "x-b3-traceid": '1dc0a6d0929a2b',
@@ -102,15 +103,19 @@ class XiaoniangaoplayScheduling:
                 r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
                 if "data" not in r.text or r.status_code != 200:
                     Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+                    Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
                     return
                 elif "data" not in r.json():
                     Common.logger(log_type, crawler).info(f"get_videoList:{r.json()}\n")
+                    Common.logging(log_type, crawler, env, f"get_videoList:{r.json()}\n")
                     return
                 elif "list" not in r.json()["data"]:
                     Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
+                    Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']}\n")
                     return
                 elif len(r.json()["data"]["list"]) == 0:
                     Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
+                    Common.logging(log_type, crawler, env, f"get_videoList:{r.json()['data']['list']}\n")
                     return
                 else:
                     # 视频列表数据
@@ -160,13 +165,16 @@ class XiaoniangaoplayScheduling:
                             }
                             for k, v in video_dict.items():
                                 Common.logger(log_type, crawler).info(f"{k}:{v}")
+                            Common.logging(log_type, crawler, env, f"{video_dict}")
 
                             # 过滤无效视频
                             if video_title == "" or video_dict["video_id"] == "" or video_dict["video_url"] == "":
                                 Common.logger(log_type, crawler).warning("无效视频\n")
+                                Common.logging(log_type, crawler, env, "无效视频\n")
                             # 抓取基础规则过滤
                             elif download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                                 Common.logger(log_type, crawler).info("不满足抓取规则\n")
+                                Common.logging(log_type, crawler, env, "不满足抓取规则\n")
                             elif any(str(word) if str(word) in video_dict["video_title"] else False
                                      for word in get_config_from_mysql(log_type=log_type,
                                                                        source=crawler,
@@ -174,8 +182,10 @@ class XiaoniangaoplayScheduling:
                                                                        text="filter",
                                                                        action="")) is True:
                                 Common.logger(log_type, crawler).info('已中过滤词\n')
+                                Common.logging(log_type, crawler, env, '已中过滤词\n')
                             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
                                 Common.logger(log_type, crawler).info('视频已下载\n')
+                                Common.logging(log_type, crawler, env, '视频已下载\n')
                             else:
                                 cls.download_publish(log_type=log_type,
                                                      crawler=crawler,
@@ -185,8 +195,10 @@ class XiaoniangaoplayScheduling:
                                                      env=env)
                         except Exception as e:
                             Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
+                            Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取第{page}页时异常:{e}\n")
+                Common.logging(log_type, crawler, env, f"抓取第{page}页时异常:{e}\n")
 
     @classmethod
     def repeat_video(cls, log_type, crawler, video_id, env):
@@ -204,11 +216,13 @@ class XiaoniangaoplayScheduling:
                 # 删除视频文件夹
                 shutil.rmtree(f"./{crawler}/videos/{md_title}")
                 Common.logger(log_type, crawler).info("视频size=0,删除成功\n")
+                Common.logging(log_type, crawler, env, "视频size=0,删除成功\n")
                 return
         except FileNotFoundError:
             # 删除视频文件夹
             shutil.rmtree(f"./{crawler}/videos/{md_title}")
             Common.logger(log_type, crawler).info("视频文件不存在,删除文件夹成功\n")
+            Common.logging(log_type, crawler, env, "视频文件不存在,删除文件夹成功\n")
             return
         # 下载封面
         Common.download_method(log_type=log_type, crawler=crawler, text="cover", title=video_dict["video_title"], url=video_dict["cover_url"])
@@ -217,6 +231,7 @@ class XiaoniangaoplayScheduling:
 
         # 上传视频
         Common.logger(log_type, crawler).info("开始上传视频...")
+        Common.logging(log_type, crawler, env, "开始上传视频...")
         if env == "dev":
             oss_endpoint = "out"
             our_video_id = Publish.upload_and_publish(log_type=log_type,
@@ -274,8 +289,10 @@ class XiaoniangaoplayScheduling:
                                                     {int(video_dict['video_width'])},
                                                     {int(video_dict['video_height'])}) """
         Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+        Common.logging(log_type, crawler, env, f"insert_sql:{insert_sql}")
         MysqlHelper.update_values(log_type, crawler, insert_sql, env)
         Common.logger(log_type, crawler).info('视频信息写入数据库成功')
+        Common.logging(log_type, crawler, env, '视频信息写入数据库成功')
 
         # 视频写入飞书
         Feishu.insert_columns(log_type, crawler, "c85k1C", "ROWS", 1, 2)
@@ -302,6 +319,7 @@ class XiaoniangaoplayScheduling:
         time.sleep(0.5)
         Feishu.update_values(log_type, crawler, "c85k1C", "F2:Z2", values)
         Common.logger(log_type, crawler).info('视频信息写入飞书成功\n')
+        Common.logging(log_type, crawler, env, '视频信息写入飞书成功\n')
 
 
 if __name__ == '__main__':

+ 4 - 4
xigua/xigua_main/run_xg_author.py

@@ -16,9 +16,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -66,8 +66,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 XiguaauthorScheduling.get_author_videos(log_type=log_type,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,

+ 4 - 4
xigua/xigua_main/run_xg_recommend.py

@@ -17,9 +17,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -71,8 +71,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 XiguarecommendScheduling.get_videoList(log_type=log_type,
                                                        crawler=crawler,
                                                        rule_dict=rule_dict,

+ 4 - 4
xigua/xigua_main/run_xg_search.py

@@ -16,9 +16,9 @@ def main(log_type, crawler, topic_name, group_id, env):
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。
-    wait_seconds = 3
+    wait_seconds = 30
     # 一次最多消费3条(最多可设置为16条)。
-    batch = 1
+    batch = 10
     Common.logger(log_type, crawler).info(f'{10 * "="}Consume And Ack Message From Topic{10 * "="}\n'
                                           f'WaitSeconds:{wait_seconds}\n'
                                           f'TopicName:{topic_name}\n'
@@ -66,8 +66,8 @@ def main(log_type, crawler, topic_name, group_id, env):
                 Common.logger(log_type, crawler).info(f"抓取规则:{rule_dict}")
                 Common.logging(log_type, crawler, env, f"抓取规则:{rule_dict}")
                 # Common.logger(log_type, crawler).info(f"用户列表:{user_list}\n")
-                Common.logger(log_type, crawler).info(f'开始抓取 {task_dict["taskName"]}\n')
-                Common.logging(log_type, crawler, env, f'开始抓取 {task_dict["taskName"]}\n')
+                Common.logger(log_type, crawler).info(f'开始抓取:{task_dict["taskName"]}\n')
+                Common.logging(log_type, crawler, env, f'开始抓取:{task_dict["taskName"]}\n')
                 XiguasearchScheduling.get_search_videos(log_type=log_type,
                                                         crawler=crawler,
                                                         rule_dict=rule_dict,

+ 1 - 1
zhiqingtiantiankan/zhiqingtiantiankan_main/run_zhiqingtiantiankan_recommend.py

@@ -10,7 +10,7 @@ from zhiqingtiantiankan.zhiqingtiantiankan_recommend.zhiqingtiantiankan_recommen
 
 
 def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 知青天天看小程序\n')
+    Common.logger(log_type, crawler).info('开始抓取:知青天天看小程序\n')
     ZhiqingtiantiankanRecommend.start_wechat(log_type, crawler, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')

+ 1 - 1
zhongmiaoyinxin/zhongmiaoyinxin_main/run_zhongmiaoyinxin_recommend.py

@@ -10,7 +10,7 @@ from zhongmiaoyinxin.zhongmiaoyinxin_recommend.zhongmiaoyinxin_recommend import
 
 
 def main(log_type, crawler, env):
-    Common.logger(log_type, crawler).info('开始抓取 众妙音信小程序\n')
+    Common.logger(log_type, crawler).info('开始抓取:众妙音信小程序\n')
     ZhongmiaoyinxinRecommend.start_wechat(log_type, crawler, env)
     Common.del_logs(log_type, crawler)
     Common.logger(log_type, crawler).info('抓取完一轮\n')