Browse Source

小年糕增加新日志

zhangyong 1 year ago
parent
commit
3c6f57e6e3
1 changed files with 81 additions and 50 deletions
  1. 81 50
      xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling2.py

+ 81 - 50
xiaoniangaoplus/xiaoniangaoplus/xiaoniangao_plus_scheduling2.py

@@ -5,6 +5,7 @@ import json
 import os
 import sys
 import time
+import uuid
 from hashlib import md5
 
 import requests
@@ -16,10 +17,11 @@ from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
 import multiprocessing
 
+
 sys.path.append(os.getcwd())
+from common import AliyunLogger, PiaoQuanPipeline
 from common.common import Common
 from common.mq import MQ
-from common.public import download_rule, get_config_from_mysql
 from common.scheduling_db import MysqlHelper
 
 
@@ -48,7 +50,7 @@ class XiaoNianGaoPlusRecommend:
         self.rule_dict = rule_dict
         self.our_uid = our_uid
         if self.env == "dev":
-            chromedriverExecutable = "/Users/a123456/Downloads/chromedriver_V111/chromedriver"
+            chromedriverExecutable = "/Users/tzld/Downloads/chromedriver_V111/chromedriver"
         else:
             chromedriverExecutable = "/Users/a123456/Downloads/chromedriver_v111/chromedriver"
 
@@ -58,7 +60,7 @@ class XiaoNianGaoPlusRecommend:
         caps = {
             "platformName": "Android",
             "devicesName": "Android",
-            "platformVersion": "13",
+            # "platformVersion": "13",
             # "udid": "emulator-5554",
             "appPackage": "com.tencent.mm",
             "appActivity": ".ui.LauncherUI",
@@ -76,7 +78,17 @@ class XiaoNianGaoPlusRecommend:
             "chromedriverExecutable": chromedriverExecutable,
             "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
         }
-        self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        try:
+            self.driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        except:
+            AliyunLogger.logging(
+                code="3002",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="appium 启动异常"
+            )
+            return
         self.driver.implicitly_wait(30)
 
         for i in range(120):
@@ -84,6 +96,13 @@ class XiaoNianGaoPlusRecommend:
                 if self.driver.find_elements(By.ID, "com.tencent.mm:id/f2s"):
                     Common.logger(self.log_type, self.crawler).info("微信启动成功")
                     Common.logging(self.log_type, self.crawler, self.env, '微信启动成功')
+                    AliyunLogger.logging(
+                        code="1000",
+                        platform=self.platform,
+                        mode=self.log_type,
+                        env=self.env,
+                        message="启动微信成功"
+                    )
                     break
                 elif self.driver.find_element(By.ID, "com.android.systemui:id/dismiss_view"):
                     Common.logger(self.log_type, self.crawler).info("发现并关闭系统下拉菜单")
@@ -92,17 +111,30 @@ class XiaoNianGaoPlusRecommend:
                 else:
                     pass
             except NoSuchElementException:
+                AliyunLogger.logging(
+                    code="3001",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="打开微信异常"
+                )
                 time.sleep(1)
 
         Common.logger(self.log_type, self.crawler).info("下滑,展示小程序选择面板")
-        # Common.logging(self.log_type, self.crawler, self.env, '下滑,展示小程序选择面板')
         size = self.driver.get_window_size()
         self.driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2),
                           int(size['width'] * 0.5), int(size['height'] * 0.8), 200)
         time.sleep(1)
         Common.logger(self.log_type, self.crawler).info('打开小程序"小年糕+"')
-        # Common.logging(self.log_type, self.crawler, self.env, '打开小程序"小年糕+"')
         self.driver.find_elements(By.XPATH, '//*[@text="小年糕+"]')[-1].click()
+        AliyunLogger.logging(
+            code="1000",
+            platform=self.platform,
+            env=self.env,
+            mode=self.log_type,
+            message="打开小程序小年糕+成功"
+
+        )
         time.sleep(5)
         self.get_videoList()
         time.sleep(1)
@@ -133,6 +165,13 @@ class XiaoNianGaoPlusRecommend:
                 self.driver.find_element(By.XPATH, xpath)
                 Common.logger(self.log_type, self.crawler).info("切换到WebView成功\n")
                 Common.logging(self.log_type, self.crawler, self.env, '切换到WebView成功\n')
+                AliyunLogger.logging(
+                    code="1000",
+                    platform=self.platform,
+                    mode=self.log_type,
+                    env=self.env,
+                    message="成功切换到 webview"
+                )
                 return
             except NoSuchElementException:
                 time.sleep(1)
@@ -186,7 +225,16 @@ class XiaoNianGaoPlusRecommend:
             return
         self.count += 1
         Common.logger(self.log_type, self.crawler).info(f"第{self.count}条视频")
-        # Common.logging(self.log_type, self.crawler, self.env, f"第{self.count}条视频")
+        # 获取 trace_id, 并且把该 id 当做视频生命周期唯一索引
+        trace_id = self.crawler + str(uuid.uuid1())
+        AliyunLogger.logging(
+            code="1001",
+            platform=self.platform,
+            mode=self.log_type,
+            env=self.env,
+            trace_id=trace_id,
+            message="扫描到一条视频",
+        )
         # 标题
         video_title = video_element.find("wx-view", class_="dynamic--title").text
         # 播放量字符串
@@ -223,6 +271,7 @@ class XiaoNianGaoPlusRecommend:
         video_dict = {
             "video_title": video_title,
             "video_id": out_video_id,
+            'out_video_id': out_video_id,
             "duration_str": duration_str,
             "duration": duration,
             "play_str": play_str,
@@ -235,64 +284,33 @@ class XiaoNianGaoPlusRecommend:
             "user_id": out_user_id,
             'publish_time_stamp': int(time.time()),
             'publish_time_str': time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+            'update_time_stamp': int(time.time()),
             "avatar_url": avatar_url,
             "cover_url": cover_url,
             "session": f"xiaoniangao-{int(time.time())}"
         }
-        for k, v in video_dict.items():
-            Common.logger(self.log_type, self.crawler).info(f"{k}:{v}")
-        Common.logging(self.log_type, self.crawler, self.env, f"video_dict:{video_dict}")
-        # Common.logger(self.log_type, self.crawler).info(f"==========分割线==========\n")
-
-        if video_title is None or cover_url is None:
-            Common.logger(self.log_type, self.crawler).info("无效视频\n")
-            Common.logging(self.log_type, self.crawler, self.env, '无效视频\n')
-            # self.swipe_up()
-            time.sleep(0.5)
-        elif download_rule(log_type=self.log_type,
-                           crawler=self.crawler,
-                           video_dict=video_dict,
-                           rule_dict=self.rule_dict) is False:
-            Common.logger(self.log_type, self.crawler).info("不满足抓取规则\n")
-            Common.logging(self.log_type, self.crawler, self.env, "不满足抓取规则\n")
-            # self.swipe_up()
-            time.sleep(0.5)
-        elif any(str(word) if str(word) in video_dict["video_title"] else False
-                 for word in get_config_from_mysql(log_type=self.log_type,
-                                                   source=self.crawler,
-                                                   env=self.env,
-                                                   text="filter",
-                                                   action="")) is True:
-            Common.logger(self.log_type, self.crawler).info('已中过滤词\n')
-            Common.logging(self.log_type, self.crawler, self.env, '已中过滤词\n')
-            # self.swipe_up()
-            time.sleep(0.5)
-        elif self.repeat_video(out_video_id) != 0:
-            Common.logger(self.log_type, self.crawler).info('视频已下载\n')
-            Common.logging(self.log_type, self.crawler, self.env, '视频已下载\n')
-            # self.swipe_up()
-            time.sleep(5)
-        else:
+        pipeline = PiaoQuanPipeline(
+            platform=self.crawler,
+            mode=self.log_type,
+            item=video_dict,
+            rule_dict=self.rule_dict,
+            env=self.env,
+            trace_id=trace_id
+        )
+        flag = pipeline.process_item()
+        if flag:
             video_title_element = self.search_elements(f'//*[contains(text(), "{video_title}")]')
             if video_title_element is None:
-                Common.logger(self.log_type, self.crawler).warning(
-                    f"未找到该视频标题的element:{video_title_element}")
-                Common.logging(self.log_type, self.crawler, self.env,
-                               f"未找到该视频标题的element:{video_title_element}")
-                # continue
                 return
             Common.logger(self.log_type, self.crawler).info("点击标题,进入视频详情页")
             Common.logging(self.log_type, self.crawler, self.env, "点击标题,进入视频详情页")
             video_url = self.get_video_url(video_title_element)
             video_url = get_redirect_url(video_url)
             if video_url is None:
-                Common.logger(self.log_type, self.crawler).info("未获取到视频播放地址\n")
                 self.driver.press_keycode(AndroidKey.BACK)
                 time.sleep(5)
                 return
             video_dict['video_url'] = video_url
-            Common.logger(self.log_type, self.crawler).info(f"video_url:{video_url}")
-
             video_dict["platform"] = self.crawler
             video_dict["strategy"] = self.log_type
             video_dict["out_video_id"] = video_dict["video_id"]
@@ -310,7 +328,13 @@ class XiaoNianGaoPlusRecommend:
             self.get_video_info_2(video_element)
         except Exception as e:
             Common.logger(self.log_type, self.crawler).error(f"抓取单条视频异常:{e}\n")
-            # Common.logging(self.log_type, self.crawler, self.env, f"抓取单条视频异常:{e}\n")
+            AliyunLogger.logging(
+                code="3001",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message=f"抓取单条视频异常:{e}\n"
+            )
 
     def get_videoList(self):
         self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
@@ -323,6 +347,13 @@ class XiaoNianGaoPlusRecommend:
         if self.search_elements('//*[@class="list-list--list"]') is None:
             Common.logger(self.log_type, self.crawler).info("窗口已销毁\n")
             Common.logging(self.log_type, self.crawler, self.env, '窗口已销毁\n')
+            AliyunLogger.logging(
+                code="3000",
+                platform=self.platform,
+                mode=self.log_type,
+                env=self.env,
+                message="窗口已销毁"
+            )
             self.count = 0
             self.download_cnt = 0
             self.element_list = []