wangkun hace 2 años
padre
commit
7b50951897

+ 22 - 13
README.MD

@@ -57,17 +57,6 @@ ps aux | grep run_youtube
 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
 ```
 
-#### 微信指数
-```commandline
-微信指数杀进程
-nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py >>/data5/piaoquan_crawler/weixinzhishu/nohup_inner_sort.log 2>&1 &
-nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py >>/data5/piaoquan_crawler/weixinzhishu/nohup_inner_long.log 2>&1 &
-nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py >>/data5/piaoquan_crawler/weixinzhishu/nohup_out.log 2>&1 &
-ps aux | grep run_weixinzhishu
-ps aux | grep weixinzhishu | grep -v grep | awk '{print $2}' | xargs kill -9
-获取 wechat_key 设备: Mac Air 
-cd ~ && source ./base_profile && ps aux | grep weixinzhishu | grep -v grep | awk '{print $2}' | xargs kill -9 && cd /Users/piaoquan/Desktop/piaoquan_crawler && nohup python3 -u weixinzhishu/weixinzhishu_key/search_key_mac.py >> weixinzhishu/nohup.log 2>&1 &
-```
 
 #### 西瓜视频
 ```commandline
@@ -101,6 +90,7 @@ ps aux | grep run_kuaishou
 ps aux | grep run_kuaishou | grep -v grep | awk '{print $2}' | xargs kill -9
 ```
 
+
 #### 小年糕
 ```commandline
 阿里云 102 服务器
@@ -133,17 +123,36 @@ ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kil
 定向爬虫策略: 
 /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
 /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
+/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-3.log
 线下调试
-定向爬虫策略: sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
+定向爬虫策略: 
+sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
+sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-2.log
+sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-3.log
 杀进程命令
 ps aux | grep run_gongzhonghao
 ps aux | grep run_gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9 
 ```
 
 
+#### 微信指数
+```commandline
+获取站外标题, crontab定时脚本, 每天 12 点运行一次
+00 12 * * * nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup-hot-search.log 2>&1 &
+获取微信指数
+nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_long.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_long.log 2>&1 &
+nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_out.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_out.log 2>&1 &
+nohup python3 -u /data5/piaoquan_crawler/weixinzhishu/weixinzhishu_main/weixinzhishu_inner_sort.py >>/data5/piaoquan_crawler/weixinzhishu/logs/nohup_inner_sort.log 2>&1 &
+ps aux | grep run_weixinzhishu
+ps aux | grep weixinzhishu | grep -v grep | awk '{print $2}' | xargs kill -9
+获取 wechat_key 设备: Mac Air 
+cd ~ && source ./base_profile && ps aux | grep weixinzhishu | grep -v grep | awk '{print $2}' | xargs kill -9 && cd /Users/piaoquan/Desktop/piaoquan_crawler && nohup python3 -u weixinzhishu/weixinzhishu_key/search_key_mac.py >> weixinzhishu/nohup.log 2>&1 &
+```
+
+
 #### 爬虫进程监测
 ```commandline
 阿里云 102 服务器:/usr/bin/sh /data5/piaoquan_crawler/main/process.sh "prod"
 香港 服务器:/usr/bin/sh /root/piaoquan_crawler/main/process.sh "hk"
-线下调试:cd /Users/wangkun/Desktop/crawler/piaoquan_crawler/ && sh main/process.sh "dev"
+线下调试:sh /Users/wangkun/Desktop/crawler/piaoquan_crawler/main/process.sh "dev"
 ```

+ 14 - 9
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -73,7 +73,11 @@ class GongzhonghaoFollow:
                     continue
                 token = sheet[0][1]
                 cookie = sheet[1][1]
-                token_dict = {'token': token, 'cookie': cookie}
+                gzh_name = sheet[2][1]
+                gzh_time = sheet[3][1]
+                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
+                print(type(token_dict['gzh_time']))
+                print(token_dict['gzh_time'])
                 return token_dict
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
@@ -120,21 +124,21 @@ class GongzhonghaoFollow:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if "list" not in r.json() or len(r.json()["list"]) == 0:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
 
@@ -244,21 +248,21 @@ class GongzhonghaoFollow:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler, f"token_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler,"公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler,f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     if 20 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler, f"公众号_1:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if len(r.json()['app_msg_list']) == 0:
@@ -507,7 +511,8 @@ class GongzhonghaoFollow:
 
 
 if __name__ == "__main__":
-    GongzhonghaoFollow.get_users()
+    GongzhonghaoFollow.get_token(log_type="follow", crawler="gongzhonghao")
+    # GongzhonghaoFollow.get_users()
     # GongzhonghaoFollow.get_videoList(log_type="follow",
     #                                  crawler="gongzhonghao",
     #                                  user="香音难忘",

+ 19 - 17
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py

@@ -73,7 +73,9 @@ class GongzhonghaoFollow2:
                     continue
                 token = sheet[0][1]
                 cookie = sheet[1][1]
-                token_dict = {'token': token, 'cookie': cookie}
+                gzh_name = sheet[2][1]
+                gzh_time = sheet[3][1]
+                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
                 return token_dict
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
@@ -118,22 +120,22 @@ class GongzhonghaoFollow2:
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if "list" not in r.json() or len(r.json()["list"]) == 0:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 fakeid = r.json()["list"][int(index) - 1]["fakeid"]
@@ -241,22 +243,22 @@ class GongzhonghaoFollow2:
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"response:{r.text}")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"token_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_2:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if len(r.json()['app_msg_list']) == 0:
@@ -328,8 +330,8 @@ class GongzhonghaoFollow2:
                             return
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
 
-                    Common.logger(log_type, crawler).info('随机休眠 0-60 秒\n')
-                    time.sleep(random.randint(0, 60))
+                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
+                    time.sleep(random.randint(60, 60*3))
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videoList异常:{e}\n")
 
@@ -498,8 +500,8 @@ class GongzhonghaoFollow2:
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.begin = 0
-                Common.logger(log_type, crawler).info('随机休眠 0-60 秒\n')
-                time.sleep(random.randint(0, 60))
+                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
+                time.sleep(random.randint(60, 60*3))
         except Exception as e:
             Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 

+ 21 - 19
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py

@@ -26,7 +26,7 @@ from common.publish import Publish
 from common.scheduling_db import MysqlHelper
 
 
-class GongzhonghaoFollow2:
+class GongzhonghaoFollow3:
     # 翻页参数
     begin = 0
     platform = "公众号"
@@ -73,7 +73,9 @@ class GongzhonghaoFollow2:
                     continue
                 token = sheet[0][1]
                 cookie = sheet[1][1]
-                token_dict = {'token': token, 'cookie': cookie}
+                gzh_name = sheet[2][1]
+                gzh_time = sheet[3][1]
+                token_dict = {'token': token, 'cookie': cookie, 'gzh_name': gzh_name, 'gzh_time': gzh_time}
                 return token_dict
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"get_cookie_token异常:{e}\n")
@@ -118,22 +120,22 @@ class GongzhonghaoFollow2:
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if "list" not in r.json() or len(r.json()["list"]) == 0:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 fakeid = r.json()["list"][int(index) - 1]["fakeid"]
@@ -241,22 +243,22 @@ class GongzhonghaoFollow2:
                 if r.json()["base_resp"]["err_msg"] == "invalid session":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"token_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if r.json()["base_resp"]["err_msg"] == "freq control":
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-                    if 21 >= datetime.datetime.now().hour >= 10:
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    if 20 >= datetime.datetime.now().hour >= 10:
+                        Feishu.bot(log_type, crawler, f"公众号_3:{token_dict['gzh_name']}\n更换日期:{token_dict['gzh_time']}\n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     continue
                 if len(r.json()['app_msg_list']) == 0:
@@ -328,8 +330,8 @@ class GongzhonghaoFollow2:
                             return
                         cls.download_publish(log_type, crawler, video_dict, oss_endpoint, env)
 
-                    Common.logger(log_type, crawler).info('随机休眠 0-60 秒\n')
-                    time.sleep(random.randint(0, 60))
+                    Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
+                    time.sleep(random.randint(60, 60*3))
         except Exception as e:
             Common.logger(log_type, crawler).error("get_videoList异常:{}\n", e)
 
@@ -498,14 +500,14 @@ class GongzhonghaoFollow2:
                 Common.logger(log_type, crawler).info(f'获取 {user_name} 公众号视频\n')
                 cls.get_videoList(log_type, crawler, user_name, index, oss_endpoint, env)
                 cls.begin = 0
-                Common.logger(log_type, crawler).info('随机休眠 0-60 秒\n')
-                time.sleep(random.randint(0, 60))
+                Common.logger(log_type, crawler).info('随机休眠 60-60*3 秒\n')
+                time.sleep(random.randint(60, 60*3))
         except Exception as e:
             Common.logger(log_type, crawler).info(f'get_all_videos异常:{e}\n')
 
 
 if __name__ == "__main__":
-    GongzhonghaoFollow2.get_users()
+    GongzhonghaoFollow3.get_users()
     # GongzhonghaoFollow.get_users()
     # GongzhonghaoFollow.get_videoList(log_type="follow",
     #                                  crawler="gongzhonghao",

+ 15 - 18
gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py

@@ -4,7 +4,6 @@
 import argparse
 import os
 import sys
-import time
 sys.path.append(os.getcwd())
 from common.common import Common
 from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow import GongzhonghaoFollow
@@ -13,23 +12,21 @@ from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow import GongzhonghaoFol
 class Main:
     @classmethod
     def main(cls, log_type, crawler, env):
-        while True:
-            try:
-                if env == "dev":
-                    oss_endpoint = "out"
-                else:
-                    oss_endpoint = "inner"
-                Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
-                GongzhonghaoFollow.get_all_videos(log_type=log_type,
-                                                  crawler=crawler,
-                                                  oss_endpoint=oss_endpoint,
-                                                  env=env)
-                Common.del_logs(log_type, crawler)
-                GongzhonghaoFollow.begin = 0
-                Common.logger(log_type, crawler).info('休眠 8 小时\n')
-                time.sleep(3600*8)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
+        try:
+            if env == "dev":
+                oss_endpoint = "out"
+            else:
+                oss_endpoint = "inner"
+            Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
+            GongzhonghaoFollow.get_all_videos(log_type=log_type,
+                                              crawler=crawler,
+                                              oss_endpoint=oss_endpoint,
+                                              env=env)
+            Common.del_logs(log_type, crawler)
+            GongzhonghaoFollow.begin = 0
+            Common.logger(log_type, crawler).info('公众号抓取一轮完毕\n')
+        except Exception as e:
+            Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
 
 
 if __name__ == '__main__':

+ 15 - 18
gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py

@@ -4,7 +4,6 @@
 import argparse
 import os
 import sys
-import time
 sys.path.append(os.getcwd())
 from common.common import Common
 from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow_2 import GongzhonghaoFollow2
@@ -13,23 +12,21 @@ from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow_2 import GongzhonghaoF
 class Main:
     @classmethod
     def main(cls, log_type, crawler, env):
-        while True:
-            try:
-                if env == "dev":
-                    oss_endpoint = "out"
-                else:
-                    oss_endpoint = "inner"
-                Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
-                GongzhonghaoFollow2.get_all_videos(log_type=log_type,
-                                                   crawler=crawler,
-                                                   oss_endpoint=oss_endpoint,
-                                                   env=env)
-                Common.del_logs(log_type, crawler)
-                GongzhonghaoFollow2.begin = 0
-                Common.logger(log_type, crawler).info('休眠 8 小时\n')
-                time.sleep(3600*8)
-            except Exception as e:
-                Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
+        try:
+            if env == "dev":
+                oss_endpoint = "out"
+            else:
+                oss_endpoint = "inner"
+            Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
+            GongzhonghaoFollow2.get_all_videos(log_type=log_type,
+                                               crawler=crawler,
+                                               oss_endpoint=oss_endpoint,
+                                               env=env)
+            Common.del_logs(log_type, crawler)
+            GongzhonghaoFollow2.begin = 0
+            Common.logger(log_type, crawler).info('公众号抓取一轮完毕\n')
+        except Exception as e:
+            Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
 
 
 if __name__ == '__main__':

+ 40 - 0
gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py

@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/3/28
+import argparse
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from gongzhonghao.gongzhonghao_follow.gongzhonghao_follow_3 import GongzhonghaoFollow3
+
+
+class Main:
+    @classmethod
+    def main(cls, log_type, crawler, env):
+        try:
+            if env == "dev":
+                oss_endpoint = "out"
+            else:
+                oss_endpoint = "inner"
+            Common.logger(log_type, crawler).info('开始抓取公众号视频\n')
+            GongzhonghaoFollow3.get_all_videos(log_type=log_type,
+                                               crawler=crawler,
+                                               oss_endpoint=oss_endpoint,
+                                               env=env)
+            Common.del_logs(log_type, crawler)
+            GongzhonghaoFollow3.begin = 0
+            Common.logger(log_type, crawler).info('公众号抓取一轮完毕\n')
+        except Exception as e:
+            Common.logger(log_type, crawler).info(f"公众号抓取异常:{e}\n")
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()  ## 新建参数解释器对象
+    parser.add_argument('--log_type', type=str)  ## 添加参数,注明参数类型
+    parser.add_argument('--crawler')  ## 添加参数
+    parser.add_argument('--env')  ## 添加参数
+    args = parser.parse_args()  ### 参数赋值,也可以通过终端赋值
+    Main.main(log_type=args.log_type,
+              crawler=args.crawler,
+              env=args.env)

+ 33 - 34
main/process.sh

@@ -17,11 +17,11 @@ else
   log_path=${piaoquan_crawler_dir}main/main_logs/process-$(date +%Y-%m-%d).log
 fi
 
-echo "$(date "+%Y-%m-%d %H:%M:%S") 开始监测爬虫进程状态\n" >> ${log_path}
+echo "$(date "+%Y-%m-%d %H:%M:%S") 开始监测爬虫进程状态" >> ${log_path}
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量..." >> ${log_path}
 cd ~ && source /etc/profile
-echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!\n" >> ${log_path}
+echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
 
 # 公众号爬虫策略
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略1-40个账号 进程状态" >> ${log_path}
@@ -29,13 +29,13 @@ ps -ef | grep "run_gongzhonghao_follow.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-40个账号 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-40个账号 进程状态正常" >> ${log_path}
 fi
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略41-81个账号 进程状态" >> ${log_path}
@@ -43,13 +43,13 @@ ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-2.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-2.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-2.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略41-81个账号 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略41-81个账号 进程状态正常" >> ${log_path}
 fi
 
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略81-121个账号 进程状态" >> ${log_path}
@@ -57,29 +57,28 @@ ps -ef | grep "run_gongzhonghao_follow_3.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow-3.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-3.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-3.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_3.py --log_type="follow-3" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-3.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略81-121个账号 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略81-121个账号 进程状态正常" >> ${log_path}
 fi
 
-
 # 小年糕定向爬虫策略
 echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 小年糕定向爬虫策略 进程状态" >> ${log_path}
 ps -ef | grep "run_xiaoniangao_follow.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="dev" xiaoniangao/nohup-follow.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-follow.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="prod"  xiaoniangao/nohup-follow.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_follow.py --log_type="follow" --crawler="xiaoniangao" --env="prod"  xiaoniangao/logs/nohup-follow.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕定向爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 # 小年糕小时榜爬虫策略
@@ -88,13 +87,13 @@ ps -ef | grep "run_xiaoniangao_hour.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/nohup-hour.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-hour.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-hour.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py --log_type="hour" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-hour.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 小年糕小时榜爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 # 播放量榜爬虫策略
@@ -103,13 +102,13 @@ ps -ef | grep "run_xiaoniangao_play.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
   echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
   if [ ${env} = "dev" ];then
-    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/nohup-play.log
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="dev" xiaoniangao/logs/nohup-play.log
   else
-    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/nohup-play.log
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./xiaoniangao/xiaoniangao_main/run_xiaoniangao_play.py --log_type="play" --crawler="xiaoniangao" --env="prod" xiaoniangao/logs/nohup-play.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 播放量榜爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 # 快手定向爬虫策略
@@ -122,9 +121,9 @@ if [ "$?" -eq 1 ];then
   else
     cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./kuaishou/kuaishou_main/run_kuaishou_follow.py --log_type="follow" --crawler="kuaishou" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" kuaishou/follow.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 快手定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 # 西瓜定向爬虫策略
@@ -137,9 +136,9 @@ if [ "$?" -eq 1 ];then
   else
     cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./xigua/xigua_main/run_xigua_follow.py --log_type="follow" --crawler="xigua" --strategy="定向爬虫策略" --oss_endpoint="inner" --env="prod" --machine="aliyun" xigua/follow.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜定向爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 西瓜定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 
@@ -153,14 +152,14 @@ if [ "$?" -eq 1 ];then
   else
     cd ${piaoquan_crawler_dir} && /usr/bin/sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="hk" --env="hk" --machine="aliyun_hk" youtube/follow.log
   fi
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") youtube定向爬虫策略 进程状态正常\n" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") youtube定向爬虫策略 进程状态正常" >> ${log_path}
 fi
 
 
 # 删除日志
 echo "$(date "+%Y-%m-%d %H:%M:%S") 开始清理 5 天前的日志文件" >> ${log_path}
 find ${piaoquan_crawler_dir}main/main_logs/ -mtime +5 -name "*.log" -exec rm -rf {} \;
-echo "$(date "+%Y-%m-%d %H:%M:%S") 日志文件清理完毕\n" >> ${log_path}
+echo "$(date "+%Y-%m-%d %H:%M:%S") 日志文件清理完毕" >> ${log_path}
 exit 0

+ 1 - 1
weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py

@@ -63,5 +63,5 @@ class Main:
         Common.logger(log_type, crawler).info(f"今日热搜榜全部抓取完毕\n")
 
 if __name__ == "__main__":
-    Main.thread_main("hot-search", "weixinzhishu", "dev")
+    Main.thread_main("hot-search", "weixinzhishu", "prod")
     pass

La diferencia del archivo ha sido suprimido porque es demasiado grande
+ 0 - 20
xiaoniangao/xiaoniangao_follow/xiaoniangao_follow.py


+ 343 - 343
xiaoniangao/xiaoniangao_hour/xiaoniangao_hour.py

@@ -90,54 +90,314 @@ class XiaoniangaoHour:
     # 获取列表
     @classmethod
     def get_videoList(cls, log_type, crawler, env):
-        try:
-            uid_token_dict = cls.get_uid_token()
-            url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
-            headers = {
-                # "x-b3-traceid": cls.hour_x_b3_traceid,
-                "x-b3-traceid": '1c403a4aa72e3c',
-                # "X-Token-Id": cls.hour_x_token_id,
-                "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
-                # "uid": cls.hour_uid,
-                "uid": uid_token_dict['uid'],
-                "content-type": "application/json",
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
-                              ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
-                              'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-                # "Referer": cls.hour_referer
-                "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
+        # try:
+        uid_token_dict = cls.get_uid_token()
+        url = "https://kapi.xiaoniangao.cn/trends/get_recommend_trends"
+        headers = {
+            # "x-b3-traceid": cls.hour_x_b3_traceid,
+            "x-b3-traceid": '1c403a4aa72e3c',
+            # "X-Token-Id": cls.hour_x_token_id,
+            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
+            # "uid": cls.hour_uid,
+            "uid": uid_token_dict['uid'],
+            "content-type": "application/json",
+            "Accept-Encoding": "gzip,compress,br,deflate",
+            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
+                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
+            # "Referer": cls.hour_referer
+            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
+        }
+        data = {
+        "log_params": {
+            "page": "discover_rec",
+            "common": {
+                "brand": "iPhone",
+                "device": "iPhone 11",
+                "os": "iOS 14.7.1",
+                "weixinver": "8.0.20",
+                "srcver": "2.24.2",
+                "net": "wifi",
+                "scene": 1089
             }
-            data = {
-            "log_params": {
-                "page": "discover_rec",
-                "common": {
-                    "brand": "iPhone",
-                    "device": "iPhone 11",
-                    "os": "iOS 14.7.1",
-                    "weixinver": "8.0.20",
-                    "srcver": "2.24.2",
-                    "net": "wifi",
-                    "scene": 1089
-                }
+        },
+        "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
+        "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
+        "share_width": 625,
+        "share_height": 500,
+        "ext": {
+            "fmid": 0,
+            "items": {}
+        },
+        "app": "xng",
+        "rec_scene": "discover_rec",
+        "log_common_params": {
+            "e": [{
+                "data": {
+                    "page": "discoverIndexPage",
+                    "topic": "recommend"
+                },
+                "ab": {}
+            }],
+            "ext": {
+                "brand": "iPhone",
+                "device": "iPhone 11",
+                "os": "iOS 14.7.1",
+                "weixinver": "8.0.20",
+                "srcver": "2.24.3",
+                "net": "wifi",
+                "scene": "1089"
             },
-            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!750x500r/crop/750x500/interlace/1/format/jpg",
-            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!80x80r/crop/80x80/interlace/1/format/jpg",
+            "pj": "1",
+            "pf": "2",
+            "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
+        },
+        "refresh": False,
+        "token": uid_token_dict["token"],
+        "uid": uid_token_dict["uid"],
+        "proj": "ma",
+        "wx_ver": "8.0.20",
+        "code_ver": "3.62.0"
+    }
+
+        urllib3.disable_warnings()
+        r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
+        if 'data' not in r.text or r.status_code != 200:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
+        elif "data" not in r.json():
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
+        elif "list" not in r.json()["data"]:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
+        elif len(r.json()['data']['list']) == 0:
+            Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
+        else:
+            # 视频列表数据
+            feeds = r.json()["data"]["list"]
+            for i in range(len(feeds)):
+                # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
+                if "title" in feeds[i]:
+                    befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
+                        .replace("/", "").replace("\r", "").replace("#", "") \
+                        .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
+                        .replace(":", "").replace("*", "").replace("?", "") \
+                        .replace("?", "").replace('"', "").replace("<", "") \
+                        .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "")
+
+                    expression = cls.get_expression()
+                    expression_list = expression[0]
+                    char_list = expression[1]
+                    # 随机取一个表情
+                    expression = random.choice(expression_list)
+                    # 生成标题list[表情+title, title+表情]
+                    expression_title_list = [expression + befor_video_title, befor_video_title + expression]
+                    # 从标题list中随机取一个标题
+                    title_list1 = random.choice(expression_title_list)
+                    # 生成标题:原标题+符号
+                    title_list2 = befor_video_title + random.choice(char_list)
+                    # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
+                    title_list4 = [title_list2, title_list1]
+                    # 最终标题
+                    video_title = random.choice(title_list4)
+                else:
+                    video_title = 0
+
+                # 视频 ID
+                if "vid" in feeds[i]:
+                    video_id = feeds[i]["vid"]
+                else:
+                    video_id = 0
+
+                # 播放量
+                if "play_pv" in feeds[i]:
+                    video_play_cnt = feeds[i]["play_pv"]
+                else:
+                    video_play_cnt = 0
+
+                # 点赞量
+                if "favor" in feeds[i]:
+                    video_like_cnt = feeds[i]["favor"]["total"]
+                else:
+                    video_like_cnt = 0
+
+                # 评论数
+                if "comment_count" in feeds[i]:
+                    video_comment_cnt = feeds[i]["comment_count"]
+                else:
+                    video_comment_cnt = 0
+
+                # 分享量
+                if "share" in feeds[i]:
+                    video_share_cnt = feeds[i]["share"]
+                else:
+                    video_share_cnt = 0
+
+                # 时长
+                if "du" in feeds[i]:
+                    video_duration = int(feeds[i]["du"] / 1000)
+                else:
+                    video_duration = 0
+
+                # 宽和高
+                if "w" or "h" in feeds[i]:
+                    video_width = feeds[i]["w"]
+                    video_height = feeds[i]["h"]
+                else:
+                    video_width = 0
+                    video_height = 0
+
+                # 发布时间
+                if "t" in feeds[i]:
+                    video_send_time = feeds[i]["t"]
+                else:
+                    video_send_time = 0
+                publish_time_stamp = int(int(video_send_time)/1000)
+                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+
+                # 用户名 / 头像
+                if "user" in feeds[i]:
+                    user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
+                        .replace("/", "").replace("快手", "").replace(" ", "") \
+                        .replace(" ", "").replace("&NBSP", "").replace("\r", "")
+                    head_url = feeds[i]["user"]["hurl"]
+                else:
+                    user_name = 0
+                    head_url = 0
+
+                # 用户 ID
+                profile_id = feeds[i]["id"]
+
+                # 用户 mid
+                profile_mid = feeds[i]["user"]["mid"]
+
+                # 视频封面
+                if "url" in feeds[i]:
+                    cover_url = feeds[i]["url"]
+                else:
+                    cover_url = 0
+
+                # 视频播放地址
+                if "v_url" in feeds[i]:
+                    video_url = feeds[i]["v_url"]
+                else:
+                    video_url = 0
+
+                video_dict = {
+                    "video_title": video_title,
+                    "video_id": video_id,
+                    "duration": video_duration,
+                    "play_cnt": video_play_cnt,
+                    "like_cnt": video_like_cnt,
+                    "comment_cnt": video_comment_cnt,
+                    "share_cnt": video_share_cnt,
+                    "user_name": user_name,
+                    "publish_time_stamp": publish_time_stamp,
+                    "publish_time_str": publish_time_str,
+                    "video_width": video_width,
+                    "video_height": video_height,
+                    "avatar_url": head_url,
+                    "profile_id": profile_id,
+                    "profile_mid": profile_mid,
+                    "cover_url": cover_url,
+                    "video_url": video_url,
+                    "session": f"xiaoniangao-hour-{int(time.time())}"
+                }
+                for k, v in video_dict.items():
+                    Common.logger(log_type, crawler).info(f"{k}:{v}")
+
+                # 过滤无效视频
+                if video_title == 0 or video_id == 0 or video_duration == 0 \
+                        or video_send_time == 0 or user_name == 0 or head_url == 0 \
+                        or cover_url == 0 or video_url == 0:
+                    Common.logger(log_type, crawler).warning("无效视频\n")
+                # 抓取基础规则过滤
+                elif cls.download_rule(video_dict) is False:
+                    Common.logger(log_type, crawler).info("不满足基础门槛规则\n")
+                elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
+                    Common.logger(log_type, crawler).info('视频已下载\n')
+                # 过滤敏感词
+                elif any(str(word) if str(word) in video_title else False for word in filter_word(log_type, crawler, "小年糕", env)) is True:
+                    Common.logger(log_type, crawler).info("视频已中过滤词\n")
+                    time.sleep(1)
+                else:
+                    # 写入飞书小时级feeds数据库表
+                    insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
+                    profile_mid,
+                    platform,
+                    out_video_id,
+                    video_title,
+                    user_name,
+                    cover_url,
+                    video_url,
+                    duration,
+                    publish_time,
+                    play_cnt,
+                    crawler_time_stamp,
+                    crawler_time)
+                    values({profile_id},
+                    {profile_mid},
+                    "{cls.platform}",
+                    "{video_id}",
+                    "{video_title}",
+                    "{user_name}",
+                    "{cover_url}",
+                    "{video_url}",
+                    {video_duration},
+                    "{publish_time_str}",
+                    {video_play_cnt},
+                    {int(time.time())},
+                    "{time.strftime("%Y-%y-%d %H:%M:%S", time.localtime(int(time.time())))}"
+                    )"""
+                    Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
+                    MysqlHelper.update_values(log_type, crawler, insert_sql, env)
+                    Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
+
+    @classmethod
+    def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
+        # try:
+        uid_token_dict = cls.get_uid_token()
+        url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
+        headers = {
+            # "x-b3-traceid": cls.hour_x_b3_traceid,
+            "x-b3-traceid": '1c403a4aa72e3c',
+            # "X-Token-Id": cls.hour_x_token_id,
+            "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
+            "uid": uid_token_dict['uid'],
+            "content-type": "application/json",
+            "Accept-Encoding": "gzip,compress,br,deflate",
+            "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
+                          ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
+                          'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
+            # "Referer": cls.hour_referer
+            "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
+        }
+        data = {
+            "play_src": "1",
+            "profile_id": int(p_id),
+            "profile_mid": int(p_mid),
+            "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/"
+                  "!400x400r/crop/400x400/interlace/1/format/jpg",
+            "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail"
+                    "/!80x80r/crop/80x80/interlace/1/format/jpg",
             "share_width": 625,
             "share_height": 500,
-            "ext": {
-                "fmid": 0,
-                "items": {}
-            },
-            "app": "xng",
-            "rec_scene": "discover_rec",
+            "no_comments": True,
+            "no_follow": True,
+            "vid": v_id,
+            "hot_l1_comment": True,
+            # "token": cls.hour_token,
+            "token": uid_token_dict['token'],
+            # "uid": cls.hour_uid,
+            "uid": uid_token_dict['uid'],
+            "proj": "ma",
+            "wx_ver": "8.0.20",
+            "code_ver": "3.62.0",
             "log_common_params": {
                 "e": [{
                     "data": {
-                        "page": "discoverIndexPage",
-                        "topic": "recommend"
-                    },
-                    "ab": {}
+                        "page": "dynamicSharePage"
+                    }
                 }],
                 "ext": {
                     "brand": "iPhone",
@@ -151,311 +411,51 @@ class XiaoniangaoHour:
                 "pj": "1",
                 "pf": "2",
                 "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
-            },
-            "refresh": False,
-            "token": uid_token_dict["token"],
-            "uid": uid_token_dict["uid"],
-            "proj": "ma",
-            "wx_ver": "8.0.20",
-            "code_ver": "3.62.0"
-        }
-
-            urllib3.disable_warnings()
-            r = requests.post(url=url, headers=headers, json=data, proxies=proxies, verify=False)
-            if 'data' not in r.text or r.status_code != 200:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.text}\n")
-            elif "data" not in r.json():
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()}\n")
-            elif "list" not in r.json()["data"]:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']}\n")
-            elif len(r.json()['data']['list']) == 0:
-                Common.logger(log_type, crawler).warning(f"get_videoList:{r.json()['data']['list']}\n")
-            else:
-                # 视频列表数据
-                feeds = r.json()["data"]["list"]
-                for i in range(len(feeds)):
-                    # 标题,表情随机加在片头、片尾,或替代句子中间的标点符号
-                    if "title" in feeds[i]:
-                        befor_video_title = feeds[i]["title"].strip().replace("\n", "") \
-                            .replace("/", "").replace("\r", "").replace("#", "") \
-                            .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
-                            .replace(":", "").replace("*", "").replace("?", "") \
-                            .replace("?", "").replace('"', "").replace("<", "") \
-                            .replace(">", "").replace("|", "").replace(" ", "").replace("#表情", "").replace("#符号", "")
-
-                        expression = cls.get_expression()
-                        expression_list = expression[0]
-                        char_list = expression[1]
-                        # 随机取一个表情
-                        expression = random.choice(expression_list)
-                        # 生成标题list[表情+title, title+表情]
-                        expression_title_list = [expression + befor_video_title, befor_video_title + expression]
-                        # 从标题list中随机取一个标题
-                        title_list1 = random.choice(expression_title_list)
-                        # 生成标题:原标题+符号
-                        title_list2 = befor_video_title + random.choice(char_list)
-                        # 表情和标题组合,与标题和符号组合,汇总成待使用的标题列表
-                        title_list4 = [title_list2, title_list1]
-                        # 最终标题
-                        video_title = random.choice(title_list4)
-                    else:
-                        video_title = 0
-
-                    # 视频 ID
-                    if "vid" in feeds[i]:
-                        video_id = feeds[i]["vid"]
-                    else:
-                        video_id = 0
-
-                    # 播放量
-                    if "play_pv" in feeds[i]:
-                        video_play_cnt = feeds[i]["play_pv"]
-                    else:
-                        video_play_cnt = 0
-
-                    # 点赞量
-                    if "favor" in feeds[i]:
-                        video_like_cnt = feeds[i]["favor"]["total"]
-                    else:
-                        video_like_cnt = 0
-
-                    # 评论数
-                    if "comment_count" in feeds[i]:
-                        video_comment_cnt = feeds[i]["comment_count"]
-                    else:
-                        video_comment_cnt = 0
-
-                    # 分享量
-                    if "share" in feeds[i]:
-                        video_share_cnt = feeds[i]["share"]
-                    else:
-                        video_share_cnt = 0
-
-                    # 时长
-                    if "du" in feeds[i]:
-                        video_duration = int(feeds[i]["du"] / 1000)
-                    else:
-                        video_duration = 0
-
-                    # 宽和高
-                    if "w" or "h" in feeds[i]:
-                        video_width = feeds[i]["w"]
-                        video_height = feeds[i]["h"]
-                    else:
-                        video_width = 0
-                        video_height = 0
-
-                    # 发布时间
-                    if "t" in feeds[i]:
-                        video_send_time = feeds[i]["t"]
-                    else:
-                        video_send_time = 0
-                    publish_time_stamp = int(int(video_send_time)/1000)
-                    publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-
-                    # 用户名 / 头像
-                    if "user" in feeds[i]:
-                        user_name = feeds[i]["user"]["nick"].strip().replace("\n", "") \
-                            .replace("/", "").replace("快手", "").replace(" ", "") \
-                            .replace(" ", "").replace("&NBSP", "").replace("\r", "")
-                        head_url = feeds[i]["user"]["hurl"]
-                    else:
-                        user_name = 0
-                        head_url = 0
-
-                    # 用户 ID
-                    profile_id = feeds[i]["id"]
-
-                    # 用户 mid
-                    profile_mid = feeds[i]["user"]["mid"]
-
-                    # 视频封面
-                    if "url" in feeds[i]:
-                        cover_url = feeds[i]["url"]
-                    else:
-                        cover_url = 0
-
-                    # 视频播放地址
-                    if "v_url" in feeds[i]:
-                        video_url = feeds[i]["v_url"]
-                    else:
-                        video_url = 0
-
-                    video_dict = {
-                        "video_title": video_title,
-                        "video_id": video_id,
-                        "duration": video_duration,
-                        "play_cnt": video_play_cnt,
-                        "like_cnt": video_like_cnt,
-                        "comment_cnt": video_comment_cnt,
-                        "share_cnt": video_share_cnt,
-                        "user_name": user_name,
-                        "publish_time_stamp": publish_time_stamp,
-                        "publish_time_str": publish_time_str,
-                        "video_width": video_width,
-                        "video_height": video_height,
-                        "avatar_url": head_url,
-                        "profile_id": profile_id,
-                        "profile_mid": profile_mid,
-                        "cover_url": cover_url,
-                        "video_url": video_url,
-                        "session": f"xiaoniangao-hour-{int(time.time())}"
-                    }
-                    for k, v in video_dict.items():
-                        Common.logger(log_type, crawler).info(f"{k}:{v}")
-
-                    # 过滤无效视频
-                    if video_title == 0 or video_id == 0 or video_duration == 0 \
-                            or video_send_time == 0 or user_name == 0 or head_url == 0 \
-                            or cover_url == 0 or video_url == 0:
-                        Common.logger(log_type, crawler).warning("无效视频\n")
-                    # 抓取基础规则过滤
-                    elif cls.download_rule(video_dict) is False:
-                        Common.logger(log_type, crawler).info("不满足基础门槛规则\n")
-                    elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env) != 0:
-                        Common.logger(log_type, crawler).info('视频已下载\n')
-                    # 过滤敏感词
-                    elif any(str(word) if str(word) in video_title else False for word in filter_word(log_type, crawler, "小年糕", env)) is True:
-                        Common.logger(log_type, crawler).info("视频已中过滤词\n")
-                        time.sleep(1)
-                    else:
-                        # 写入飞书小时级feeds数据库表
-                        insert_sql = f""" insert into crawler_xiaoniangao_hour(profile_id,
-                        profile_mid,
-                        platform,
-                        out_video_id,
-                        video_title,
-                        user_name,
-                        cover_url,
-                        video_url,
-                        duration,
-                        publish_time,
-                        play_cnt,
-                        crawler_time_stamp,
-                        crawler_time)
-                        values({profile_id},
-                        {profile_mid},
-                        "{cls.platform}",
-                        "{video_id}",
-                        "{video_title}",
-                        "{user_name}",
-                        "{cover_url}",
-                        "{video_url}",
-                        {video_duration},
-                        "{publish_time_str}",
-                        {video_play_cnt},
-                        {int(time.time())},
-                        "{time.strftime("%Y-%y-%d %H:%M:%S", time.localtime(int(time.time())))}"
-                        )"""
-                        Common.logger(log_type, crawler).info(f"insert_sql:{insert_sql}")
-                        MysqlHelper.update_values(log_type, crawler, insert_sql, env)
-                        Common.logger(log_type, crawler).info('视频信息插入数据库成功!\n')
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"get_videoList:{e}\n")
-
-    @classmethod
-    def get_video_info(cls, log_type, crawler, p_id, p_mid, v_title, v_id):
-        try:
-            uid_token_dict = cls.get_uid_token()
-            url = "https://kapi.xiaoniangao.cn/profile/get_profile_by_id"
-            headers = {
-                # "x-b3-traceid": cls.hour_x_b3_traceid,
-                "x-b3-traceid": '1c403a4aa72e3c',
-                # "X-Token-Id": cls.hour_x_token_id,
-                "X-Token-Id": 'ab619e96d801f1567388629260aa68ec-1202200806',
-                "uid": uid_token_dict['uid'],
-                "content-type": "application/json",
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": 'Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)'
-                              ' AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 '
-                              'MicroMessenger/8.0.20(0x18001432) NetType/WIFI Language/zh_CN',
-                # "Referer": cls.hour_referer
-                "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/624/page-frame.html'
             }
-            data = {
-                "play_src": "1",
-                "profile_id": int(p_id),
-                "profile_mid": int(p_mid),
-                "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/"
-                      "!400x400r/crop/400x400/interlace/1/format/jpg",
-                "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail"
-                        "/!80x80r/crop/80x80/interlace/1/format/jpg",
-                "share_width": 625,
-                "share_height": 500,
-                "no_comments": True,
-                "no_follow": True,
-                "vid": v_id,
-                "hot_l1_comment": True,
-                # "token": cls.hour_token,
-                "token": uid_token_dict['token'],
-                # "uid": cls.hour_uid,
-                "uid": uid_token_dict['uid'],
-                "proj": "ma",
-                "wx_ver": "8.0.20",
-                "code_ver": "3.62.0",
-                "log_common_params": {
-                    "e": [{
-                        "data": {
-                            "page": "dynamicSharePage"
-                        }
-                    }],
-                    "ext": {
-                        "brand": "iPhone",
-                        "device": "iPhone 11",
-                        "os": "iOS 14.7.1",
-                        "weixinver": "8.0.20",
-                        "srcver": "2.24.3",
-                        "net": "wifi",
-                        "scene": "1089"
-                    },
-                    "pj": "1",
-                    "pf": "2",
-                    "session_id": "7bcce313-b57d-4305-8d14-6ebd9a1bad29"
-                }
+        }
+        urllib3.disable_warnings()
+        r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
+        if r.status_code != 200 or 'data' not in r.text:
+            Common.logger(log_type, crawler).warning(f"get_videoInfo:{r.text}\n")
+        else:
+            hour_play_cnt = r.json()["data"]["play_pv"]
+            hour_cover_url = r.json()["data"]["url"]
+            hour_video_url = r.json()["data"]["v_url"]
+            hour_video_duration = r.json()["data"]["du"]
+            hour_video_comment_cnt = r.json()["data"]["comment_count"]
+            hour_video_like_cnt = r.json()["data"]["favor"]["total"]
+            hour_video_share_cnt = r.json()["data"]["share"]
+            hour_video_width = r.json()["data"]["w"]
+            hour_video_height = r.json()["data"]["h"]
+            hour_video_send_time = r.json()["data"]["t"]
+            publish_time_stamp = int(int(hour_video_send_time)/1000)
+            publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
+            hour_user_name = r.json()["data"]["user"]["nick"]
+            hour_head_url = r.json()["data"]["user"]["hurl"]
+            video_info_dict = {
+                "video_id": v_id,
+                "video_title": v_title,
+                "duration": hour_video_duration,
+                "play_cnt": hour_play_cnt,
+                "like_cnt": hour_video_like_cnt,
+                "comment_cnt": hour_video_comment_cnt,
+                "share_cnt": hour_video_share_cnt,
+                "user_name": hour_user_name,
+                "publish_time_stamp": publish_time_stamp,
+                "publish_time_str": publish_time_str,
+                "video_width": hour_video_width,
+                "video_height": hour_video_height,
+                "avatar_url": hour_head_url,
+                "profile_id": p_id,
+                "profile_mid": p_mid,
+                "cover_url": hour_cover_url,
+                "video_url": hour_video_url,
+                "session": f"xiaoniangao-hour-{int(time.time())}"
             }
-            urllib3.disable_warnings()
-            r = requests.post(headers=headers, url=url, json=data, proxies=proxies, verify=False)
-            if r.status_code != 200 or 'data' not in r.text:
-                Common.logger(log_type, crawler).warning(f"get_videoInfo:{r.text}\n")
-            else:
-                hour_play_cnt = r.json()["data"]["play_pv"]
-                hour_cover_url = r.json()["data"]["url"]
-                hour_video_url = r.json()["data"]["v_url"]
-                hour_video_duration = r.json()["data"]["du"]
-                hour_video_comment_cnt = r.json()["data"]["comment_count"]
-                hour_video_like_cnt = r.json()["data"]["favor"]["total"]
-                hour_video_share_cnt = r.json()["data"]["share"]
-                hour_video_width = r.json()["data"]["w"]
-                hour_video_height = r.json()["data"]["h"]
-                hour_video_send_time = r.json()["data"]["t"]
-                publish_time_stamp = int(int(hour_video_send_time)/1000)
-                publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
-                hour_user_name = r.json()["data"]["user"]["nick"]
-                hour_head_url = r.json()["data"]["user"]["hurl"]
-                video_info_dict = {
-                    "video_id": v_id,
-                    "video_title": v_title,
-                    "duration": hour_video_duration,
-                    "play_cnt": hour_play_cnt,
-                    "like_cnt": hour_video_like_cnt,
-                    "comment_cnt": hour_video_comment_cnt,
-                    "share_cnt": hour_video_share_cnt,
-                    "user_name": hour_user_name,
-                    "publish_time_stamp": publish_time_stamp,
-                    "publish_time_str": publish_time_str,
-                    "video_width": hour_video_width,
-                    "video_height": hour_video_height,
-                    "avatar_url": hour_head_url,
-                    "profile_id": p_id,
-                    "profile_mid": p_mid,
-                    "cover_url": hour_cover_url,
-                    "video_url": hour_video_url,
-                    "session": f"xiaoniangao-hour-{int(time.time())}"
-                }
-                return video_info_dict
+            return video_info_dict
 
-        except Exception as e:
-            Common.logger(log_type, crawler).error(f"download_video:{e}\n")
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error(f"download_video:{e}\n")
 
     # 更新小时榜数据
     @classmethod

+ 30 - 30
xiaoniangao/xiaoniangao_main/run_xiaoniangao_hour.py

@@ -11,38 +11,38 @@ from xiaoniangao.xiaoniangao_hour.xiaoniangao_hour import XiaoniangaoHour
 
 
 def main(log_type, crawler, env):
-    while True:
-        if env == "dev":
-            oss_endpoint = "out"
-        else:
-            oss_endpoint = "inner"
-        # 获取符合规则的视频,写入小时级数据_feeds
-        XiaoniangaoHour.get_videoList(log_type, crawler, env)
-        now = datetime.datetime.now()
-        if now.hour == 10 and 0 <= now.minute <= 10:
-            Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-            XiaoniangaoHour.update_videoList(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy="小时榜爬虫策略",
-                                             oss_endpoint=oss_endpoint,
-                                             env=env)
+    # while True:
+    if env == "dev":
+        oss_endpoint = "out"
+    else:
+        oss_endpoint = "inner"
+    # 获取符合规则的视频,写入小时级数据_feeds
+    XiaoniangaoHour.get_videoList(log_type, crawler, env)
+    now = datetime.datetime.now()
+    if now.hour == 10 and 0 <= now.minute <= 10:
+        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+        XiaoniangaoHour.update_videoList(log_type=log_type,
+                                         crawler=crawler,
+                                         strategy="小时榜爬虫策略",
+                                         oss_endpoint=oss_endpoint,
+                                         env=env)
 
-        elif now.hour == 15 and now.minute <= 10:
-            Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-            XiaoniangaoHour.update_videoList(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy="小时榜爬虫策略",
-                                             oss_endpoint=oss_endpoint,
-                                             env=env)
+    elif now.hour == 15 and now.minute <= 10:
+        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+        XiaoniangaoHour.update_videoList(log_type=log_type,
+                                         crawler=crawler,
+                                         strategy="小时榜爬虫策略",
+                                         oss_endpoint=oss_endpoint,
+                                         env=env)
 
-        elif now.hour == 20 and now.minute <= 10:
-            Common.logger(log_type, crawler).info("开始更新/下载上升榜")
-            XiaoniangaoHour.update_videoList(log_type=log_type,
-                                             crawler=crawler,
-                                             strategy="小时榜爬虫策略",
-                                             oss_endpoint=oss_endpoint,
-                                             env=env)
-        Common.del_logs(log_type, crawler)
+    elif now.hour == 20 and now.minute <= 10:
+        Common.logger(log_type, crawler).info("开始更新/下载上升榜")
+        XiaoniangaoHour.update_videoList(log_type=log_type,
+                                         crawler=crawler,
+                                         strategy="小时榜爬虫策略",
+                                         oss_endpoint=oss_endpoint,
+                                         env=env)
+    Common.del_logs(log_type, crawler)
 
 
 if __name__ == "__main__":

Algunos archivos no se mostraron porque demasiados archivos cambiaron en este cambio