Browse Source

douyin author etl

ehlxr 2 năm trước cách đây
mục cha
commit
611517b7d2
1 tập tin đã thay đổi với 20 bổ sung6 xóa
  1. 20 6
      douyin/douyin_author/douyin_author_scheduling.py

+ 20 - 6
douyin/douyin_author/douyin_author_scheduling.py

@@ -8,6 +8,8 @@ import sys
 import time
 import requests
 from hashlib import md5
+
+from common.mq import MQ
 sys.path.append(os.getcwd())
 from common.common import Common
 from common.scheduling_db import MysqlHelper
@@ -54,6 +56,7 @@ class DouyinauthorScheduling:
 
     @classmethod
     def get_videoList(cls, log_type, crawler, user_dict, rule_dict, env):
+        mq = MQ(topic_name="topic_crawler_etl_" + env)
         max_cursor = ""
         # while True:
         url = "https://www.douyin.com/aweme/v1/web/aweme/post/?device_platform=webapp&aid=6383&channel=channel_pc_web&sec_user_id={sec_user_id}&max_cursor={max_cursor}&show_live_replay_strategy=1&count=10&publish_video_strategy_type=2&pc_client_type=1&version_code=170400&version_name=17.4.0&cookie_enabled=true&screen_width=1440&screen_height=900&browser_language=zh-CN&browser_platform=MacIntel&browser_name=Chrome&browser_version=112.0.0.0&browser_online=true&engine_name=Blink&engine_version=112.0.0.0&os_name=Mac+OS&os_version=10.15.7&cpu_core_num=8&device_memory=8&platform=PC&downlink=10&effective_type=4g&round_trip_time=50".format(
@@ -146,12 +149,23 @@ class DouyinauthorScheduling:
                     Common.logger(log_type, crawler).info('视频已下载\n')
                     Common.logging(log_type, crawler, env, '视频已下载\n')
                 else:
-                    cls.download_publish(log_type=log_type,
-                                         crawler=crawler,
-                                         user_dict=user_dict,
-                                         video_dict=video_dict,
-                                         rule_dict=rule_dict,
-                                         env=env)
+                    # cls.download_publish(log_type=log_type,
+                    #                      crawler=crawler,
+                    #                      user_dict=user_dict,
+                    #                      video_dict=video_dict,
+                    #                      rule_dict=rule_dict,
+                    #                      env=env)
+                    video_dict["out_user_id"] = video_dict["user_id"]
+                    video_dict["platform"] = crawler
+                    video_dict["strategy"] = log_type
+                    video_dict["out_video_id"] = video_dict["video_id"]
+                    video_dict["width"] = video_dict["video_width"]
+                    video_dict["height"] = video_dict["video_height"]
+                    video_dict["crawler_rule"] = json.dumps(rule_dict)
+                    video_dict["user_id"] = user_dict["uid"]
+                    video_dict["publish_time"] = video_dict["publish_time_str"]
+
+                    mq.send_msg(video_dict)
             except Exception as e:
                 Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
                 Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")