Преглед на файлове

update chromedriverExecutable

piaoquan преди 1 година
родител
ревизия
f18bc2f879

+ 2 - 0
kanyikan/kanyikan_main/run_kykoffline_recommend.py

@@ -27,6 +27,8 @@ def run(args1, args2, args3, args4, args5):
 class ZFQZMain:
     @classmethod
     def zhufuquanzi_main(cls, log_type, crawler, topic_name, group_id, env):
+        topic_name="kykoffline_recommend_prod"
+        group_id="kykoffline_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_feed.py

@@ -22,7 +22,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanRecommend:
-    platform = "看一看"
+    platform = "看一看-feed流"
     strategy = "feed流"
 
     @classmethod

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_offline.py

@@ -30,7 +30,7 @@ class KanyikanRecommend:
         if env == "dev":
             chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
         else:
-            chromedriverExecutable = "/Users/crawler/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(log_type, crawler).info("启动微信")
         Common.logging(log_type, crawler, env, '启动微信')

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_plus.py

@@ -19,7 +19,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanRecommend:
-    platform = "看一看"
+    platform = "看一看-plus"
     strategy = "随机数据抓取"
 
     @classmethod

+ 1 - 1
kanyikan/kanyikan_recommend/kanyikan_recommend_video_id.py

@@ -22,7 +22,7 @@ proxies = {"http": None, "https": None}
 
 
 class KanyikanViodeRecommend:
-    platform = "看一看"
+    platform = "看一看-feed流"
     strategy = "video_id-feed流"
 
 

+ 6 - 0
main/process_mq.sh

@@ -74,6 +74,12 @@ elif [ ${crawler} = "xngrule" ] && [ ${log_type} = "recommend" ];then
   python=python3
   log_path=${piaoquan_crawler_dir}main/main_logs/process-mq-$(date +%Y-%m-%d).log
 
+elif [ ${crawler} = "zfqz" ] && [ ${log_type} = "recommend" ];then
+  piaoquan_crawler_dir=/Users/tzld/Desktop/piaoquan_crawler/
+  profile_path=/.base_profile
+  python=python3
+  log_path=${piaoquan_crawler_dir}main/main_logs/process-mq-$(date +%Y-%m-%d).log
+
 elif [ ${crawler} = "xnguser" ] && [ ${log_type} = "recommend" ];then
   piaoquan_crawler_dir=/Users/tzld/Desktop/piaoquan_crawler/
   profile_path=/.base_profile

+ 19 - 2
main/process_offline.sh

@@ -35,6 +35,8 @@ if [[ "$time" > "00:30:00"  &&  "$time" < "01:59:59" || "$time" > "05:00:00"  &&
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_zhongmiaoyinxin | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_jixiangxingfu_recommend.py" | grep -v "grep"
   if [ "$?" -eq 1 ];then
     echo "$(date "+%Y-%m-%d %H:%M:%S") 吉祥幸福爬虫, 异常停止, 正在重启!" >> ${log_path}
@@ -72,7 +74,7 @@ else
 fi
 
 # 小年糕-rule
-if [[ "$time" > "21:00:00"  &&  "$time" < "21:59:59" || "$time" > "22:00:00"  &&  "$time" < "22:29:59" ]];then
+if [[ "$time" > "21:00:00"  &&  "$time" < "21:59:59" || "$time" > "12:00:00"  &&  "$time" < "12:29:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 小年糕-rule 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
@@ -118,11 +120,12 @@ fi
 
 
 # 祝福圈子
-if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "12:00:00"  &&  "$time" < "13:59:59" ]];then
+if [[ "$time" > "07:00:00"  &&  "$time" < "08:59:59" || "$time" > "13:00:00"  &&  "$time" < "13:59:59" ]];then
   echo "$(date "+%Y-%m-%d %H:%M:%S") 开始启动 祝福圈子 爬虫脚本任务" >> ${log_path}
 #  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
 #  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
   ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
   ps -ef | grep "run_zfqz_recommend.py" | grep -v "grep"
@@ -140,6 +143,20 @@ else
   echo "$(date "+%Y-%m-%d %H:%M:%S") 祝福圈子 爬虫脚本任务结束" >> ${log_path}
 fi
 
+
+
+
+if [[ "$time" > "14:00:00"  &&  "$time" < "14:00:59" || "$time" > "22:00:00"  &&  "$time" < "22:00:59" ]];then
+  echo "$(date "+%Y-%m-%d %H:%M:%S") 爬虫脚本任务结束" >> ${log_path}
+#  ps aux | grep run_htzf | grep -v grep | awk '{print $2}' | xargs kill -9
+#  ps aux | grep run_ganggangdouchuan | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngplus | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zfqz | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_xngrule | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_jixiangxingfu | grep -v grep | awk '{print $2}' | xargs kill -9
+  ps aux | grep run_zmyx | grep -v grep | awk '{print $2}' | xargs kill -9
+fi
+
 # 删除日志
 echo "$(date "+%Y-%m-%d %H:%M:%S") 开始清理 10 天前的日志文件" >> ${log_path}
 find ${piaoquan_crawler_dir}main/main_logs/ -mtime +10 -name "*.log" -exec rm -rf {} \;

+ 7 - 7
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_get_userid.py

@@ -38,7 +38,7 @@ class XiaoNianGaoPlusRecommend:
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
         self.mq = None
-        self.platform = "小年糕"
+        self.platform = "小年糕账号ID"
         self.download_cnt = 0
         self.element_list = []
         self.count = 0
@@ -49,9 +49,9 @@ class XiaoNianGaoPlusRecommend:
         self.rule_dict = rule_dict
         self.our_uid = our_uid
         if self.env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_V111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(self.log_type, self.crawler).info("启动微信")
         # 微信的配置文件
@@ -187,14 +187,14 @@ class XiaoNianGaoPlusRecommend:
         Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
         self.get_video_url(video_title_element)
 
-        video_mid_elements = self.search_elements("//wx-view[@class='bar--navBar-content-capsule']")
+        video_mid_elements = self.search_elements("//wx-view[@class='bar--navBar-content-capsule-wrap']")
         mid = int(video_mid_elements[0].get_attribute("data-mid"))
         repeat_video_id= self.repeat_video_id(mid)
         data_list = []
         if repeat_video_id != 0:
             Common.logger(self.log_type, self.crawler).info(f"该用户已经存在")
-            status = 1
-            self.insert_user(mid, user_name, data_list, status)
+            # status = 0
+            # self.insert_user(mid, user_name, data_list, status)
 
             self.driver.press_keycode(AndroidKey.BACK)
             return
@@ -282,7 +282,7 @@ class XiaoNianGaoPlusRecommend:
 
 
     def repeat_video_id(self,mid):
-        sql = f"SELECT `uid`  FROM `crawler_user_v3` WHERE  `source` = 'xiaoniangao'  and `uid` = {mid}"
+        sql = f"SELECT `link`  FROM `crawler_user_v3` WHERE  `source` = 'xiaoniangao'  and `link` = {mid}"
         repeat_video_id = MysqlHelper.get_values(self.log_type, self.crawler, sql, self.env)
         return len(repeat_video_id)
 

+ 7 - 6
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling2.py

@@ -39,7 +39,7 @@ class XiaoNianGaoPlusRecommend:
 
     def __init__(self, log_type, crawler, env, rule_dict, our_uid):
         self.mq = None
-        self.platform = "小年糕"
+        self.platform = "小年糕+"
         self.download_cnt = 0
         self.element_list = []
         self.count = 0
@@ -50,9 +50,9 @@ class XiaoNianGaoPlusRecommend:
         self.rule_dict = rule_dict
         self.our_uid = our_uid
         if self.env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_V111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/a123456/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(self.log_type, self.crawler).info("启动微信")
         Common.logging(self.log_type, self.crawler, self.env, '启动微信')
@@ -60,7 +60,7 @@ class XiaoNianGaoPlusRecommend:
         caps = {
             "platformName": "Android",
             "devicesName": "Android",
-            # "platformVersion": "13",
+            # "platformVersion": "11",
             # "udid": "emulator-5554",
             "appPackage": "com.tencent.mm",
             "appActivity": ".ui.LauncherUI",
@@ -80,13 +80,14 @@ class XiaoNianGaoPlusRecommend:
         }
         try:
             self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
-        except:
+        except Exception as e:
+            print(e)
             AliyunLogger.logging(
                 code="3002",
                 platform=self.platform,
                 mode=self.log_type,
                 env=self.env,
-                message="appium 启动异常"
+                message=f'appium 启动异常: {e}'
             )
             return
         self.driver.implicitly_wait(30)

+ 2 - 0
xiaoniangaoplus/xiaoniangaoplus_main/run_xngplus_recommend.py

@@ -27,6 +27,8 @@ def run(args1, args2, args3, args4, args5):
 
 
 def main(log_type, crawler, topic_name, group_id, env):
+    topic_name = "xngplus_recommend_prod"
+    group_id = "xngplus_recommend_prod"
     consumer = get_consumer(topic_name, group_id)
     # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
     # 长轮询时间3秒(最多可设置为30秒)。

+ 2 - 0
xiaoniangaoplus/xiaoniangaoplus_main/run_xngrule_recommend.py

@@ -29,6 +29,8 @@ def run(args1, args2, args3, args4, args5):
 class Main:
     @classmethod
     def main(cls, log_type, crawler, topic_name, group_id, env):
+        topic_name = "xngrule_recommend_prod"
+        group_id = "xngrule_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。

+ 1 - 1
zhongmiaoyinxin/zhongmiaoyinxin_recommend/zhongmiaoyinxin_recommend_new.py

@@ -37,7 +37,7 @@ class ZMYXRecommend:
         if self.env == "dev":
             chromedriverExecutable = "/Users/luojunhui/Downloads/chromedriver_V111/chromedriver"
         else:
-            chromedriverExecutable = '/Users/luojunhui/Downloads/chromedriver_V111/chromedriver'  # Mac 爬虫机器
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         # 微信的配置文件
         caps = {
             "platformName": "Android",  # 手机操作系统 Android / iOS

+ 2 - 0
zhufuquanzi/zhufuquanzi_main/run_zfqz_recommend.py

@@ -28,6 +28,8 @@ def run(args1, args2, args3, args4, args5):
 class ZFQZMain:
     @classmethod
     def zhufuquanzi_main(cls, log_type, crawler, topic_name, group_id, env):
+        group_id = "zfqz_recommend_prod"
+        topic_name = "zfqz_recommend_prod"
         consumer = get_consumer(topic_name, group_id)
         # 长轮询表示如果Topic没有消息,则客户端请求会在服务端挂起3秒,3秒内如果有消息可以消费则立即返回响应。
         # 长轮询时间3秒(最多可设置为30秒)。

+ 5 - 4
zhufuquanzi/zhufuquanzi_recommend/zhufuquanzi_recommend_new.py

@@ -16,10 +16,9 @@ from selenium.webdriver.common.by import By
 
 
 sys.path.append(os.getcwd())
-from common import AliyunLogger, PiaoQuanPipeline
+from common import AliyunLogger, PiaoQuanPipeline, get_redirect_url
 from common.common import Common
 from common.mq import MQ
-from common.public import download_rule, get_config_from_mysql
 from common.scheduling_db import MysqlHelper
 
 
@@ -32,9 +31,9 @@ class ZFQZRecommend:
     @classmethod
     def start_wechat(cls, log_type, crawler, env, rule_dict, our_uid):
         if env == "dev":
-            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
         else:
-            chromedriverExecutable = "/Users/crawler/Downloads/chromedriver_v111/chromedriver"
+            chromedriverExecutable = "/Users/piaoquan/Downloads/chromedriver"
 
         Common.logger(log_type, crawler).info("启动微信")
         Common.logging(log_type, crawler, env, '启动微信')
@@ -302,6 +301,7 @@ class ZFQZRecommend:
                         comment_cnt = int(comment_str)
                     out_video_id = md5(video_title.encode('utf8')).hexdigest()
                     out_user_id = md5(user_name.encode('utf8')).hexdigest()
+                    Common.logger(log_type, crawler).warning(f"视频标题:{video_title},点赞:{like_str},播放:{play_cnt},用户名称:{user_name},")
 
                     video_dict = {
                         "video_title": video_title,
@@ -347,6 +347,7 @@ class ZFQZRecommend:
                             message=f"点击标题,进入视频详情页\n"
                         )
                         video_url = cls.get_video_url(log_type, crawler, driver, video_title_element)
+                        video_url = get_redirect_url(video_url)
                         if video_url is None:
                             driver.press_keycode(AndroidKey.BACK)
                             time.sleep(5)