Ver código fonte

douyin recommend etl

ehlxr 2 anos atrás
pai
commit
736ea925eb

+ 18 - 6
douyin/douyin_recommend/douyin_recommend_scheduling.py

@@ -8,6 +8,7 @@ import sys
 import time
 import requests
 from hashlib import md5
+from common.mq import MQ
 sys.path.append(os.getcwd())
 from douyin.douyin_recommend import get_xb
 from common.common import Common
@@ -55,6 +56,7 @@ class DouyinrecommendScheduling:
 
     @classmethod
     def get_videoList(cls, log_type, crawler, rule_dict, our_uid, env):
+        mq = MQ(topic_name="topic_crawler_etl_" + env)
         for page in range(1, 101):
             Common.logger(log_type, crawler).info(f"正在抓取第{page}页\n")
             Common.logging(log_type, crawler, env, f"正在抓取第{page}页\n")
@@ -136,12 +138,22 @@ class DouyinrecommendScheduling:
                             Common.logger(log_type, crawler).info('视频已下载\n')
                             Common.logging(log_type, crawler, env, '视频已下载\n')
                         else:
-                            cls.download_publish(log_type=log_type,
-                                                 crawler=crawler,
-                                                 our_uid=our_uid,
-                                                 video_dict=video_dict,
-                                                 rule_dict=rule_dict,
-                                                 env=env)
+                            # cls.download_publish(log_type=log_type,
+                            #                      crawler=crawler,
+                            #                      our_uid=our_uid,
+                            #                      video_dict=video_dict,
+                            #                      rule_dict=rule_dict,
+                            #                      env=env)
+                            video_dict["out_user_id"] = video_dict["user_id"]
+                            video_dict["platform"] = crawler
+                            video_dict["strategy"] = log_type
+                            video_dict["out_video_id"] = video_dict["video_id"]
+                            video_dict["width"] = video_dict["video_width"]
+                            video_dict["height"] = video_dict["video_height"]
+                            video_dict["crawler_rule"] = json.dumps(rule_dict)
+                            video_dict["user_id"] = our_uid
+                            video_dict["publish_time"] = video_dict["publish_time_str"]
+                            mq.send_msg(video_dict)
                     except Exception as e:
                         Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                         Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")