wangkun 2 年 前
コミット
8439093230

+ 3 - 1
README.MD

@@ -130,7 +130,9 @@ ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kil
 #### 公众号
 ```commandline
 阿里云 102 服务器
-定向爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+定向爬虫策略: 
+/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
 线下调试
 定向爬虫策略: sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
 杀进程命令

+ 17 - 5
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -120,17 +120,22 @@ class GongzhonghaoFollow:
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
+                    os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 elif r.json()["base_resp"]["err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
+                    os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 else:
                     break
             if "list" not in r.json() or len(r.json()["list"]) == 0:
                 Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text},休眠 1 秒\n")
-                time.sleep(1)
+                Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                return
             else:
                 fakeid = r.json()["list"][int(index) - 1]["fakeid"]
                 head_url = r.json()["list"][int(index) - 1]["round_head_img"]
@@ -241,21 +246,28 @@ class GongzhonghaoFollow:
                         Common.logger(log_type, crawler).info(f"response:{r.text}")
                         Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
+                        os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     elif r.json()["base_resp"][
                         "err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                        Feishu.bot(log_type, crawler,
-                                   "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        Feishu.bot(log_type, crawler,"公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
+                        os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     else:
                         break
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    break
+                    Feishu.bot(log_type, crawler,
+                               "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    time.sleep(60 * 10)
+                    os.system(
+                        "ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                    return
                 elif len(r.json()['app_msg_list']) == 0:
                     Common.logger(log_type, crawler).info('没有更多视频了\n')
+                    return
                 else:
                     cls.begin += 5
                     app_msg_list = r.json()['app_msg_list']

+ 19 - 3
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py

@@ -120,17 +120,24 @@ class GongzhonghaoFollow2:
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
+                    os.system("ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 elif r.json()["base_resp"]["err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                     Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
+                    os.system(
+                        "ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 else:
                     break
             if "list" not in r.json() or len(r.json()["list"]) == 0:
                 Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                Common.logger(log_type, crawler).warning(f"get_fakeid:{r.text},休眠 1 秒\n")
-                time.sleep(1)
+                Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                time.sleep(60 * 10)
+                os.system(
+                    "ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                return
             else:
                 fakeid = r.json()["list"][int(index) - 1]["fakeid"]
                 head_url = r.json()["list"][int(index) - 1]["round_head_img"]
@@ -241,20 +248,29 @@ class GongzhonghaoFollow2:
                         Common.logger(log_type, crawler).info(f"response:{r.text}")
                         Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
+                        os.system(
+                            "ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     elif r.json()["base_resp"][
                         "err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
                         Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
+                        os.system(
+                            "ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     else:
                         break
                 if 'app_msg_list' not in r.json():
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    break
+                    Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    time.sleep(60 * 10)
+                    os.system(
+                        "ps aux | grep run_gongzhonghao_follow_2.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                    return
                 elif len(r.json()['app_msg_list']) == 0:
                     Common.logger(log_type, crawler).info('没有更多视频了\n')
+                    return
                 else:
                     cls.begin += 5
                     app_msg_list = r.json()['app_msg_list']

BIN
main/.DS_Store


+ 13 - 13
main/process.sh

@@ -33,19 +33,19 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
 fi
 
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略51-100个账号 进程状态" >> ${log_path}
-#ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
-#fi
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略51-100个账号 进程状态" >> ${log_path}
+ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  if [ ${env} = "dev" ];then
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
+  else
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
+  fi
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
+fi
 
 
 # 小年糕定向爬虫策略

+ 13 - 0
weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py

@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/3/31
+import datetime
+
+from weixinzhishu.weixinzhishu_hot_search.weixinzhishu_hot_search import HotSearch
+
+
+class Main:
+    @classmethod
+    def main(cls):
+        if datetime.datetime.now().hour == 12:
+            HotSearch.wechat_hot_search()

+ 3 - 0
weixinzhishu/weixinzhishu_score/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/3/31