Browse Source

xigua recommend etl

ehlxr 1 year ago
parent
commit
2605f375e3
1 changed files with 18 additions and 6 deletions
  1. 18 6
      xigua/xigua_recommend/xigua_recommend_scheduling.py

+ 18 - 6
xigua/xigua_recommend/xigua_recommend_scheduling.py

@@ -16,6 +16,7 @@ from requests.adapters import HTTPAdapter
 from selenium import webdriver
 from selenium.webdriver import DesiredCapabilities
 from selenium.webdriver.chrome.service import Service
+from common.mq import MQ
 sys.path.append(os.getcwd())
 from common.userAgent import get_random_user_agent
 from common.publish import Publish
@@ -602,6 +603,7 @@ class XiguarecommendScheduling:
 
     @classmethod
     def get_videoList(cls, log_type, crawler, our_uid, rule_dict, env):
+        mq = MQ(topic_name="topic_crawler_etl_" + env)
         queryCount = 1
         for i in range(100):
             Common.logger(log_type, crawler).info(f"正在抓取第{queryCount}页视频")
@@ -706,12 +708,22 @@ class XiguarecommendScheduling:
                                 Common.logger(log_type, crawler).info('视频已下载\n')
                                 Common.logging(log_type, crawler, env, "视频已下载\n")
                             else:
-                                cls.download_publish(log_type=log_type,
-                                                     crawler=crawler,
-                                                     our_uid=our_uid,
-                                                     video_dict=video_dict,
-                                                     rule_dict=rule_dict,
-                                                     env=env)
+                                # cls.download_publish(log_type=log_type,
+                                #                      crawler=crawler,
+                                #                      our_uid=our_uid,
+                                #                      video_dict=video_dict,
+                                #                      rule_dict=rule_dict,
+                                #                      env=env)
+                                video_dict["out_user_id"] = video_dict["user_id"]
+                                video_dict["platform"] = crawler
+                                video_dict["strategy"] = log_type
+                                video_dict["out_video_id"] = video_dict["video_id"]
+                                video_dict["width"] = video_dict["video_width"]
+                                video_dict["height"] = video_dict["video_height"]
+                                video_dict["crawler_rule"] = json.dumps(rule_dict)
+                                video_dict["user_id"] = our_uid
+                                video_dict["publish_time"] = video_dict["publish_time_str"]
+                                mq.send_msg(video_dict)
                         except Exception as e:
                             Common.logger(log_type, crawler).error(f"抓取单条视频时异常:{e}\n")
                             Common.logging(log_type, crawler, env, f"抓取单条视频时异常:{e}\n")