Quellcode durchsuchen

Merge branch 'wangkun'

wangkun vor 2 Jahren
Ursprung
Commit
a8f3f383e3

BIN
.DS_Store


+ 3 - 2
README.MD

@@ -138,8 +138,9 @@ ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
 #### 微信指数
 #### 微信指数
 ```commandline
 ```commandline
 获取站外标题, crontab定时脚本, 每天 12 点运行一次
 获取站外标题, crontab定时脚本, 每天 12 点运行一次
-00 12 * * * nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup-hot-search.log 2>&1 &
-获取微信指数
+00 12 * * * cd /data5/piaoquan_crawler/ && nohup python -u weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py >>weixinzhishu/logs/nohup-hot-search.log 2>&1 &
+获取微信指数, crontab定时脚本, 每天 08:00:00 20:00:00 各运行一次
+00 08,20 * * * cd /data5/piaoquan_crawler/ && /root/anaconda3/bin/python weixinzhishu/weixinzhishu_main/run_weixinzhishu_score.py >>weixinzhishu/logs/nohup-score.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_long.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_long.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_out.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_out.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_sort.log 2>&1 &
 nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_sort.log 2>&1 &

+ 15 - 11
common/common.py

@@ -133,17 +133,21 @@ class Common:
             # 视频名
             # 视频名
             video_name = "video.mp4"
             video_name = "video.mp4"
 
 
-            # 下载视频
-            urllib3.disable_warnings()
-            # response = requests.get(video_url, stream=True, proxies=cls.tunnel_proxies(), verify=False)
-            response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
-            try:
-                with open(video_path + video_name, "wb") as f:
-                    for chunk in response.iter_content(chunk_size=10240):
-                        f.write(chunk)
-                cls.logger(log_type, crawler).info("==========视频下载完成==========")
-            except Exception as e:
-                cls.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
+            for i in range(3):
+                try:
+                    # 下载视频,最多重试三次
+                    urllib3.disable_warnings()
+                    # response = requests.get(video_url, stream=True, proxies=cls.tunnel_proxies(), verify=False)
+                    response = requests.get(video_url, stream=True, proxies=proxies, verify=False)
+
+                    with open(video_path + video_name, "wb") as f:
+                        for chunk in response.iter_content(chunk_size=10240):
+                            f.write(chunk)
+                    cls.logger(log_type, crawler).info("==========视频下载完成==========")
+                    break
+                except Exception as e:
+                    cls.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
+                    time.sleep(1)
 
 
         # 下载音频
         # 下载音频
         elif text == "audio":
         elif text == "audio":

+ 12 - 12
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -136,7 +136,7 @@ class GongzhonghaoFollow:
                     continue
                     continue
                 if "list" not in r.json() or len(r.json()["list"]) == 0:
                 if "list" not in r.json() or len(r.json()["list"]) == 0:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
@@ -253,14 +253,14 @@ class GongzhonghaoFollow:
                     continue
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
                     continue
                     continue
                 if 'app_msg_list' not in r.json():
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
@@ -334,8 +334,8 @@ class GongzhonghaoFollow:
                             return
                             return
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
 
 
-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                    time.sleep(random.randint(60, 60*3))
+                    Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
+                    time.sleep(random.randint(60*5, 60*10))
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
 
 
@@ -496,18 +496,18 @@ class GongzhonghaoFollow:
 
 
     @classmethod
     @classmethod
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
-        try:
-            user_list = cls.get_users()
-            for user_dict in user_list:
+        user_list = cls.get_users()
+        for user_dict in user_list:
+            try:
                 user_name = user_dict['user_name']
                 user_name = user_dict['user_name']
                 index = user_dict['index']
                 index = user_dict['index']
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.begin = 0
                 cls.begin = 0
-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                time.sleep(random.randint(60, 60*3))
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
+                time.sleep(random.randint(60*5, 60*10))
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":

+ 12 - 12
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py

@@ -242,21 +242,21 @@ class GongzhonghaoFollow2:
                 r = requests.get(url=url, headers=headers, params=params, verify=False)
                 r = requests.get(url=url, headers=headers, params=params, verify=False)
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).info(f"response:{r.text}")
+                    Common.logger(log_type, crawler).info(f"get_videoList:{r.text}")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
                     continue
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
                     continue
                     continue
                 if 'app_msg_list' not in r.json():
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
                     if 20 >= datetime.datetime.now().hour >= 10:
                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     time.sleep(60 * 10)
@@ -330,8 +330,8 @@ class GongzhonghaoFollow2:
                             return
                             return
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
 
 
-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                    time.sleep(random.randint(60, 60*3))
+                    Common.logger(log_type, crawler).info('随机休眠 60*3-60*8 秒\n')
+                    time.sleep(random.randint(60*5, 60*10))
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
 
 
@@ -492,18 +492,18 @@ class GongzhonghaoFollow2:
 
 
     @classmethod
     @classmethod
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
-        try:
-            user_list = cls.get_users()
-            for user_dict in user_list:
+        user_list = cls.get_users()
+        for user_dict in user_list:
+            try:
                 user_name = user_dict['user_name']
                 user_name = user_dict['user_name']
                 index = user_dict['index']
                 index = user_dict['index']
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.begin = 0
                 cls.begin = 0
-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                time.sleep(random.randint(60, 60*3))
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
+                time.sleep(random.randint(60*5, 60*10))
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":

+ 9 - 9
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py

@@ -330,8 +330,8 @@ class GongzhonghaoFollow3:
                             return
                             return
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
 
 
-                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                    time.sleep(random.randint(60, 60*3))
+                    Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
+                    time.sleep(random.randint(60*5, 60*10))
         except Exception as e:
         except Exception as e:
             Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
             Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
 
 
@@ -492,18 +492,18 @@ class GongzhonghaoFollow3:
 
 
     @classmethod
     @classmethod
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
     def get_all_videos(cls, log_type, crawler, oss_endpoint, env):
-        try:
-            user_list = cls.get_users()
-            for user_dict in user_list:
+        user_list = cls.get_users()
+        for user_dict in user_list:
+            try:
                 user_name = user_dict['user_name']
                 user_name = user_dict['user_name']
                 index = user_dict['index']
                 index = user_dict['index']
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.begin = 0
                 cls.begin = 0
-                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
-                time.sleep(random.randint(60, 60*3))
-        except Exception as e:
-            Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
+                Common.logger(log_type, crawler).info('随机休眠 60*5, 60*10 秒\n')
+                time.sleep(random.randint(60*5, 60*10))
+            except Exception as e:
+                Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 
 
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":

+ 1 - 1
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -469,7 +469,7 @@ class XiaoniangaoHour:
         try:
         try:
             befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
             befor_yesterday = (datetime.date.today() + datetime.timedelta(days=-3)).strftime("%Y-%m-%d %H:%M:%S")
             update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
             update_time_stamp = int(time.mktime(time.strptime(befor_yesterday, "%Y-%m-%d %H:%M:%S")))
-            select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} """
+            select_sql = f""" select * from crawler_xiaoniangao_hour where crawler_time_stamp >= {update_time_stamp} GROUP BY out_video_id """
             update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
             update_video_list = MysqlHelper.get_values(log_type, crawler, select_sql, env)
             if len(update_video_list) == 0:
             if len(update_video_list) == 0:
                 Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")
                 Common.logger(log_type, crawler).info("暂无需要更新的小时榜数据\n")