Browse Source

Merge branch 'master' of https://git.yishihui.com/Server/piaoquan_crawler

lierqiang 2 years ago
parent
commit
06250ee87c

+ 3 - 1
README.MD

@@ -130,7 +130,9 @@ ps aux | grep run_xiaoniangao_play | grep -v grep | awk '{print $2}' | xargs kil
 #### 公众号
 ```commandline
 阿里云 102 服务器
-定向爬虫策略: /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+定向爬虫策略: 
+/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow.log
+/usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
 线下调试
 定向爬虫策略: sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
 杀进程命令

+ 24 - 26
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py

@@ -118,28 +118,29 @@ class GongzhonghaoFollow:
                 if r.json()["base_resp"]["err_msg"] == "invalid session" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    # Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 elif r.json()["base_resp"]["err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    # Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                elif "list" not in r.json() or len(r.json()["list"]) == 0 and 21 >= datetime.datetime.now().hour >= 10:
+                    Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
+                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                    # Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    time.sleep(60 * 10)
+                    os.system(
+                        "ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                 else:
                     break
-            if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
-                os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
-                return
-            else:
-                fakeid = r.json()["list"][int(index) - 1]["fakeid"]
-                head_url = r.json()["list"][int(index) - 1]["round_head_img"]
-                fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
-                return fakeid_dict
+
+            fakeid = r.json()["list"][int(index) - 1]["fakeid"]
+            head_url = r.json()["list"][int(index) - 1]["round_head_img"]
+            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
+            return fakeid_dict
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
 
@@ -243,29 +244,26 @@ class GongzhonghaoFollow:
                         "err_msg"] == "invalid session" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).info(f"response:{r.text}")
-                        Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                        # Feishu.bot(log_type, crawler, "token_1过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
                         os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     elif r.json()["base_resp"][
                         "err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                        Feishu.bot(log_type, crawler,
-                                   "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        # Feishu.bot(log_type, crawler,"公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
                         os.system("ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
+                    elif 'app_msg_list' not in r.json() and 21 >= datetime.datetime.now().hour >= 10:
+                        Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
+                        Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                        # Feishu.bot(log_type, crawler, "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        time.sleep(60 * 10)
+                        os.system(
+                            "ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
                     else:
                         break
-                if 'app_msg_list' not in r.json():
-                    Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler,
-                               "公众号_1频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    os.system(
-                        "ps aux | grep run_gongzhonghao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9")
-                    return
-                elif len(r.json()['app_msg_list']) == 0:
+                if len(r.json()['app_msg_list']) == 0:
                     Common.logger(log_type, crawler).info('没有更多视频了\n')
                     return
                 else:

+ 21 - 24
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py

@@ -118,28 +118,27 @@ class GongzhonghaoFollow2:
                 if r.json()["base_resp"]["err_msg"] == "invalid session" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                    # Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     os.system("ps aux | grep run_gongzhonghao_follow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
                 elif r.json()["base_resp"]["err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                     Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                     Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    # Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                     time.sleep(60 * 10)
                     os.system("ps aux | grep run_gongzhonghao_follow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
+                elif "list" not in r.json() or len(r.json()["list"]) == 0 and 21 >= datetime.datetime.now().hour >= 10:
+                    Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
+                    # Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                    time.sleep(60 * 10)
+                    os.system(
+                        "ps aux | grep run_gongzhonghao_follow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
                 else:
                     break
-            if "list" not in r.json() or len(r.json()["list"]) == 0:
-                Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                time.sleep(60 * 10)
-                os.system("ps aux | grep run_gongzhonghao_follow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
-                return
-            else:
-                fakeid = r.json()["list"][int(index) - 1]["fakeid"]
-                head_url = r.json()["list"][int(index) - 1]["round_head_img"]
-                fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
-                return fakeid_dict
+            fakeid = r.json()["list"][int(index) - 1]["fakeid"]
+            head_url = r.json()["list"][int(index) - 1]["round_head_img"]
+            fakeid_dict = {'fakeid': fakeid, 'head_url': head_url}
+            return fakeid_dict
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_fakeid异常:{e}\n")
 
@@ -243,27 +242,25 @@ class GongzhonghaoFollow2:
                         "err_msg"] == "invalid session" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).info(f"response:{r.text}")
-                        Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
+                        # Feishu.bot(log_type, crawler, "token_2过期啦,请扫码更换token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
                         os.system("ps aux | grep run_gongzhonghao_follow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
                     elif r.json()["base_resp"][
                         "err_msg"] == "freq control" and 21 >= datetime.datetime.now().hour >= 10:
                         Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
                         Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                        Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        # Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
+                        time.sleep(60 * 10)
+                        os.system("ps aux | grep run_gongzhonghaofollow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
+                    elif 'app_msg_list' not in r.json() and 21 >= datetime.datetime.now().hour >= 10:
+                        Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
+                        Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
+                        # Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
                         time.sleep(60 * 10)
                         os.system("ps aux | grep run_gongzhonghaofollow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
                     else:
                         break
-                if 'app_msg_list' not in r.json():
-                    Common.logger(log_type, crawler).info(f"status_code:{r.status_code}")
-                    Common.logger(log_type, crawler).warning(f"get_gzh_url:{r.text}\n")
-                    Feishu.bot(log_type, crawler, "公众号_2频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/")
-                    time.sleep(60 * 10)
-                    os.system(
-                        "ps aux | grep run_gongzhonghaofollow_2 | grep -v grep | awk '{print $2}' | xargs kill -9")
-                    return
-                elif len(r.json()['app_msg_list']) == 0:
+                if len(r.json()['app_msg_list']) == 0:
                     Common.logger(log_type, crawler).info('没有更多视频了\n')
                     return
                 else:

BIN
main/.DS_Store


+ 13 - 13
main/process.sh

@@ -38,19 +38,19 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
 fi
 
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略51-100个账号 进程状态" >> ${log_path}
-#ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
-#fi
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略51-100个账号 进程状态" >> ${log_path}
+ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  if [ ${env} = "dev" ];then
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow" --crawler="gongzhonghao" --env="dev" gongzhonghao/nohup-follow.log
+  else
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/nohup-follow-2.log
+  fi
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!\n" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-50个账号 进程状态正常\n" >> ${log_path}
+fi
 
 
 # 小年糕定向爬虫策略

+ 0 - 0
weixinzhishu/weixinzhishu_hot/__init__.py → weixinzhishu/weixinzhishu_hot_search/__init__.py


+ 4 - 0
weixinzhishu/weixinzhishu_hot/weixinzhishu_douyin.py → weixinzhishu/weixinzhishu_hot_search/weixinzhishu_hot_search.py

@@ -1,3 +1,7 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2023/3/27
+
+
+class HotSearch:
+    pass

+ 13 - 0
weixinzhishu/weixinzhishu_main/run_weixinzhishu_hot_search.py

@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/3/31
+import datetime
+
+from weixinzhishu.weixinzhishu_hot_search.weixinzhishu_hot_search import HotSearch
+
+
+class Main:
+    @classmethod
+    def main(cls):
+        if datetime.datetime.now().hour == 12:
+            HotSearch.wechat_hot_search()

+ 1 - 1
weixinzhishu/weixinzhishu_main/run_weixinzhishu.py → weixinzhishu/weixinzhishu_main/run_weixinzhishu_score.py

@@ -6,7 +6,7 @@ import os
 import sys
 sys.path.append(os.getcwd())
 from common.common import Common
-from weixinzhishu.weixinzhishu_main.get_weixinzhishu import Weixinzhishu
+from weixinzhishu.weixinzhishu_score.weixinzhishu_score import Weixinzhishu
 
 
 class Main:

+ 3 - 0
weixinzhishu/weixinzhishu_score/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/3/31

+ 0 - 0
weixinzhishu/weixinzhishu_main/get_weixinzhishu.py → weixinzhishu/weixinzhishu_score/weixinzhishu_score.py