Browse Source

update gongzhonghao

wangkun 2 years ago
parent
commit
91cdbf14df

+ 4 - 4
common/common.py

@@ -69,10 +69,10 @@ class Common:
             if name == ".log":
                 all_logs.append(log)
 
-        if len(all_logs) <= 10:
+        if len(all_logs) <= 20:
             pass
         else:
-            for file in all_logs[:len(all_logs) - 10]:
+            for file in all_logs[:len(all_logs) - 20]:
                 os.remove(log_dir + file)
         cls.logger(log_type, crawler).info("清除日志成功\n")
 
@@ -297,7 +297,7 @@ class Common:
         # 用户名密码方式
         username = "t17772369458618"
         password = "5zqcjkmy"
-        proxies = {
+        tunnel_proxies = {
             "http": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel},
             "https": "http://%(user)s:%(pwd)s@%(proxy)s/" % {"user": username, "pwd": password, "proxy": tunnel}
         }
@@ -315,7 +315,7 @@ class Common:
         # # 使用隧道域名发送请求
         # response = requests.get(target_url, proxies=proxies)
         # print(response.text)
-        return proxies
+        return tunnel_proxies
 
 
 if __name__ == "__main__":

File diff suppressed because it is too large
+ 4 - 2
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow.py


File diff suppressed because it is too large
+ 4 - 3
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_2.py


+ 2 - 1
gongzhonghao/gongzhonghao_follow/gongzhonghao_follow_3.py

@@ -295,7 +295,8 @@ class GongzhonghaoFollow3:
                         # title
                         if 'title' in article_url:
                             title = article_url['title'].replace('/', '').replace('\n', '') \
-                                .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')
+                                .replace('.', '').replace('“', '').replace('”', '').replace(' ', '')\
+                                .replace('"', '').replace("'", "")
                         else:
                             title = 0
 

+ 15 - 15
main/process.sh

@@ -24,7 +24,7 @@ cd ~ && source /etc/profile
 echo "$(date "+%Y-%m-%d %H:%M:%S") 更新环境变量完成!" >> ${log_path}
 
 # 公众号爬虫策略
-echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略 1-120个账号 进程状态" >> ${log_path}
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略 1-100个账号 进程状态" >> ${log_path}
 #echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略1-40个账号 进程状态" >> ${log_path}
 ps -ef | grep "run_gongzhonghao_follow.py" | grep -v "grep"
 if [ "$?" -eq 1 ];then
@@ -36,22 +36,22 @@ if [ "$?" -eq 1 ];then
   fi
   echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
 else
-  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-40个账号 进程状态正常" >> ${log_path}
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略1-100个账号 进程状态正常" >> ${log_path}
 fi
 
-#echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略41-81个账号 进程状态" >> ${log_path}
-#ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
-#if [ "$?" -eq 1 ];then
-#  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
-#  if [ ${env} = "dev" ];then
-#    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-2.log
-#  else
-#    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-2.log
-#  fi
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
-#else
-#  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略41-81个账号 进程状态正常" >> ${log_path}
-#fi
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略101-145个账号 进程状态" >> ${log_path}
+ps -ef | grep "run_gongzhonghao_follow_2.py" | grep -v "grep"
+if [ "$?" -eq 1 ];then
+  echo "$(date "+%Y-%m-%d_%H:%M:%S") 异常停止,正在重启!" >> ${log_path}
+  if [ ${env} = "dev" ];then
+    cd ${piaoquan_crawler_dir} && sh main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="dev" gongzhonghao/logs/nohup-follow-2.log
+  else
+    cd ${piaoquan_crawler_dir} && /usr/bin/sh /data5/piaoquan_crawler/main/scheduling_main.sh ./gongzhonghao/gongzhonghao_main/run_gongzhonghao_follow_2.py --log_type="follow-2" --crawler="gongzhonghao" --env="prod"  gongzhonghao/logs/nohup-follow-2.log
+  fi
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 重启完成!" >> ${log_path}
+else
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 公众号爬虫策略101-145个账号 进程状态正常" >> ${log_path}
+fi
 #
 #echo "$(date "+%Y-%m-%d %H:%M:%S") 正在监测 公众号爬虫策略81-121个账号 进程状态" >> ${log_path}
 #ps -ef | grep "run_gongzhonghao_follow_3.py" | grep -v "grep"

Some files were not shown because too many files changed in this diff