|
@@ -8,6 +8,7 @@ import sys
|
|
import time
|
|
import time
|
|
import requests
|
|
import requests
|
|
from hashlib import md5
|
|
from hashlib import md5
|
|
|
|
+from common.mq import MQ
|
|
sys.path.append(os.getcwd())
|
|
sys.path.append(os.getcwd())
|
|
from douyin.douyin_recommend import get_xb
|
|
from douyin.douyin_recommend import get_xb
|
|
from common.common import Common
|
|
from common.common import Common
|
|
@@ -55,6 +56,7 @@ class DouyinrecommendScheduling:
|
|
|
|
|
|
@classmethod
|
|
@classmethod
|
|
def get_videoList(cls, log_type, crawler, rule_dict, our_uid, env):
|
|
def get_videoList(cls, log_type, crawler, rule_dict, our_uid, env):
|
|
|
|
+ mq = MQ(topic_name="topic_crawler_etl_" + env)
|
|
for page in range(1, 101):
|
|
for page in range(1, 101):
|
|
Common.logger(log_type, crawler).info(f"正在抓取第{page}页\n")
|
|
Common.logger(log_type, crawler).info(f"正在抓取第{page}页\n")
|
|
Common.logging(log_type, crawler, env, f"正在抓取第{page}页\n")
|
|
Common.logging(log_type, crawler, env, f"正在抓取第{page}页\n")
|
|
@@ -136,12 +138,22 @@ class DouyinrecommendScheduling:
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
Common.logger(log_type, crawler).info('视频已下载\n')
|
|
Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
Common.logging(log_type, crawler, env, '视频已下载\n')
|
|
else:
|
|
else:
|
|
- cls.download_publish(log_type=log_type,
|
|
|
|
- crawler=crawler,
|
|
|
|
- our_uid=our_uid,
|
|
|
|
- video_dict=video_dict,
|
|
|
|
- rule_dict=rule_dict,
|
|
|
|
- env=env)
|
|
|
|
|
|
+ # cls.download_publish(log_type=log_type,
|
|
|
|
+ # crawler=crawler,
|
|
|
|
+ # our_uid=our_uid,
|
|
|
|
+ # video_dict=video_dict,
|
|
|
|
+ # rule_dict=rule_dict,
|
|
|
|
+ # env=env)
|
|
|
|
+ video_dict["out_user_id"] = video_dict["user_id"]
|
|
|
|
+ video_dict["platform"] = crawler
|
|
|
|
+ video_dict["strategy"] = log_type
|
|
|
|
+ video_dict["out_video_id"] = video_dict["video_id"]
|
|
|
|
+ video_dict["width"] = video_dict["video_width"]
|
|
|
|
+ video_dict["height"] = video_dict["video_height"]
|
|
|
|
+ video_dict["crawler_rule"] = json.dumps(rule_dict)
|
|
|
|
+ video_dict["user_id"] = our_uid
|
|
|
|
+ video_dict["publish_time"] = video_dict["publish_time_str"]
|
|
|
|
+ mq.send_msg(video_dict)
|
|
except Exception as e:
|
|
except Exception as e:
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
Common.logger(log_type, crawler).error(f"抓取单条视频异常:{e}\n")
|
|
Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|
|
Common.logging(log_type, crawler, env, f"抓取单条视频异常:{e}\n")
|