hace 2 años · a8f3f383e3
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.MD
+++ b/README.MD
@@ -138,8 +138,9 @@ ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
 
															 #### 微信指数
														
 
															 ```commandline
														
 
															 获取站外标题, crontab定时脚本, 每天 12 点运行一次
														
 
															-00 12 * * * nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup-hot-search.log 2>&1 &
														
 
															-获取微信指数
														
 
															+00 12 * * * cd /data5/piaoquan_crawler/ && nohup python -u weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py >>weixinzhishu/logs/nohup-hot-search.log 2>&1 &
														
 
															+获取微信指数, crontab定时脚本, 每天 08:00:00 20:00:00 各运行一次
														
 
															+00 08,20 * * * cd /data5/piaoquan_crawler/ && /root/anaconda3/bin/python weixinzhishu/weixinzhishu_main/run_weixinzhishu_score.py >>weixinzhishu/logs/nohup-score.log 2>&1 &
														
 
															 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_long.log 2>&1 &
														
 
															 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_out.log 2>&1 &
														
 
															 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_sort.log 2>&1 &
														
--- a/common/common.py
+++ b/common/common.py
@@ -133,17 +133,21 @@ class Common:
 
															             # 视频名
														
 
															             video_name = "video.mp4"
														
 
															-            # 下载视频
														
 
															-            urllib3.disable_warnings()
														
 
															-            # response = requests.get(video_url, stream=True, proxies=cls.tunnel_proxies(), verify=False)
														
 
															-            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
														
 
															-            try:
														
 
															-                with open(video_path + video_name, "wb") as f:
														
 
															-                    for chunk in response.iter_content(chunk_size=10240):
														
 
															-                        f.write(chunk)
														
 
															-                cls.logger(log_type, crawler).info("==========视频下载完成==========")
														
 
															-            except Exception as e:
														
 
															-                cls.logger(log_type, crawler).error(f"视频下载失败：{e}\n")
														
 
															+            for i in range(3):
														
 
															+                try:
														
 
															+                    # 下载视频，最多重试三次
														
 
															+                    urllib3.disable_warnings()
														
 
															+                    # response = requests.get(video_url, stream=True, proxies=cls.tunnel_proxies(), verify=False)
														
 
															+                    response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
														
 
															+
														
 
															+                    with open(video_path + video_name, "wb") as f:
														
 
															+                        for chunk in response.iter_content(chunk_size=10240):
														
 
															+                            f.write(chunk)
														
 
															+                    cls.logger(log_type, crawler).info("==========视频下载完成==========")
														
 
															+                    break
														
 
															+                except Exception as e:
														
 
															+                    cls.logger(log_type, crawler).error(f"视频下载失败：{e}\n")
														
 
															+                    time.sleep(1)
														
 
															         # 下载音频
														
 
															         elif text == "audio":
														
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py
@@ -136,7 +136,7 @@ class GongzhonghaoFollow:
 
															                     continue
														
 
															                 if "list" not in r.json() or len(r.json()["list"]) == 0:
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
														
 
															+                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
@@ -253,14 +253,14 @@ class GongzhonghaoFollow:
 
															                     continue
														
 
															                 if r.json()["base_resp"]["err_msg"] == "freq control":
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
														
 
															+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
 
															                     continue
														
 
															                 if 'app_msg_list' not in r.json():
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
														
 
															+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
@@ -334,8 +334,8 @@ class GongzhonghaoFollow:
 
															                             return
														
 
															                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
														
 
															-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                    time.sleep(random.randint(60, 60*3))
														
 
															+                    Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
														
 
															+                    time.sleep(random.randint(60*5, 60*10))
														
 
															         except Exception as e:
														
 
															             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
														
@@ -496,18 +496,18 @@ class GongzhonghaoFollow:
 
															     @classmethod
														
 
															     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
														
 
															-        try:
														
 
															-            user_list = cls.get_users()
														
 
															-            for user_dict in user_list:
														
 
															+        user_list = cls.get_users()
														
 
															+        for user_dict in user_list:
														
 
															+            try:
														
 
															                 user_name = user_dict['user_name']
														
 
															                 index = user_dict['index']
														
 
															                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
														
 
															                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
														
 
															                 cls.begin = 0
														
 
															-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                time.sleep(random.randint(60, 60*3))
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
														
 
															+                time.sleep(random.randint(60*5, 60*10))
														
 
															+            except Exception as e:
														
 
															+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															 if __name__ == "__main__":
														
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py
@@ -242,21 +242,21 @@ class GongzhonghaoFollow2:
 
															                 r = requests.get(url=url, headers=headers, params=params, verify=False)
														
 
															                 if r.json()["base_resp"]["err_msg"] == "invalid session":
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).info(f"response:{r.text}")
														
 
															+                    Common.logger(log_type, crawler).info(f"get_videoList:{r.text}")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦，请扫码更换token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
 
															                     continue
														
 
															                 if r.json()["base_resp"]["err_msg"] == "freq control":
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
														
 
															+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
 
															                     continue
														
 
															                 if 'app_msg_list' not in r.json():
														
 
															                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
														
 
															-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
														
 
															+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
														
 
															                     if 20 >= datetime.datetime.now().hour >= 10:
														
 
															                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦，请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
														
 
															                     time.sleep(60 * 10)
														
@@ -330,8 +330,8 @@ class GongzhonghaoFollow2:
 
															                             return
														
 
															                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
														
 
															-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                    time.sleep(random.randint(60, 60*3))
														
 
															+                    Common.logger(log_type, crawler).info('随机休眠 60*3-60*8 秒\n')
														
 
															+                    time.sleep(random.randint(60*5, 60*10))
														
 
															         except Exception as e:
														
 
															             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
														
@@ -492,18 +492,18 @@ class GongzhonghaoFollow2:
 
															     @classmethod
														
 
															     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
														
 
															-        try:
														
 
															-            user_list = cls.get_users()
														
 
															-            for user_dict in user_list:
														
 
															+        user_list = cls.get_users()
														
 
															+        for user_dict in user_list:
														
 
															+            try:
														
 
															                 user_name = user_dict['user_name']
														
 
															                 index = user_dict['index']
														
 
															                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
														
 
															                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
														
 
															                 cls.begin = 0
														
 
															-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                time.sleep(random.randint(60, 60*3))
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
														
 
															+                time.sleep(random.randint(60*5, 60*10))
														
 
															+            except Exception as e:
														
 
															+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															 if __name__ == "__main__":
														
--- a/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py
+++ b/gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py
@@ -330,8 +330,8 @@ class GongzhonghaoFollow3:
 
															                             return
														
 
															                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
														
 
															-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                    time.sleep(random.randint(60, 60*3))
														
 
															+                    Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
														
 
															+                    time.sleep(random.randint(60*5, 60*10))
														
 
															         except Exception as e:
														
 
															             Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
														
@@ -492,18 +492,18 @@ class GongzhonghaoFollow3:
 
															     @classmethod
														
 
															     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
														
 
															-        try:
														
 
															-            user_list = cls.get_users()
														
 
															-            for user_dict in user_list:
														
 
															+        user_list = cls.get_users()
														
 
															+        for user_dict in user_list:
														
 
															+            try:
														
 
															                 user_name = user_dict['user_name']
														
 
															                 index = user_dict['index']
														
 
															                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
														
 
															                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
														
 
															                 cls.begin = 0
														
 
															-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
														
 
															-                time.sleep(random.randint(60, 60*3))
														
 
															-        except Exception as e:
														
 
															-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
														
 
															+                time.sleep(random.randint(60*5, 60*10))
														
 
															+            except Exception as e:
														
 
															+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
														
 
															 if __name__ == "__main__":
														
--- a/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
+++ b/xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py
@@ -469,7 +469,7 @@ class XiaoniangaoHour:
 
															         try:
														
 
															             befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
														
 
															             update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
														
 
															-            select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} """
														
 
															+            select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
														
 
															             update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
														
 
															             if len(update_video_list) == 0:
														
 
															                 Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")