|
@@ -18,7 +18,7 @@ from common.scheduling_db import MysqlHelper
|
|
|
|
|
|
class HaoKanVideoAccount(object):
|
|
class HaoKanVideoAccount(object):
|
|
def __init__(self, platform, mode, rule_dict, user_dict, env):
|
|
def __init__(self, platform, mode, rule_dict, user_dict, env):
|
|
- self.account_id = user_dict["link"]
|
|
|
|
|
|
+ self.account_id = user_dict["link"].split("/")[-1]
|
|
self.platform = platform
|
|
self.platform = platform
|
|
self.mode = mode
|
|
self.mode = mode
|
|
self.rule_dict = rule_dict
|
|
self.rule_dict = rule_dict
|
|
@@ -134,7 +134,6 @@ class HaoKanVideoAccount(object):
|
|
item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
|
|
item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
|
|
# 准备发往 MQ 的消息
|
|
# 准备发往 MQ 的消息
|
|
mq_obj = item.produce_item()
|
|
mq_obj = item.produce_item()
|
|
- print(item.item)
|
|
|
|
# 筛选规则的 pipeline
|
|
# 筛选规则的 pipeline
|
|
pipeline = PiaoQuanPipelineTest(
|
|
pipeline = PiaoQuanPipelineTest(
|
|
platform=self.platform,
|
|
platform=self.platform,
|
|
@@ -145,11 +144,11 @@ class HaoKanVideoAccount(object):
|
|
trace_id=trace_id,
|
|
trace_id=trace_id,
|
|
)
|
|
)
|
|
if pipeline.process_item():
|
|
if pipeline.process_item():
|
|
- self.mq.send_msg(mq_obj)
|
|
|
|
- print(mq_obj)
|
|
|
|
|
|
+ # self.mq.send_msg(mq_obj)
|
|
|
|
+ print(json.dumps(mq_obj))
|
|
print("成功发送至 ETL")
|
|
print("成功发送至 ETL")
|
|
|
|
|
|
- def schedule_code(self):
|
|
|
|
|
|
+ def schedule(self):
|
|
"""
|
|
"""
|
|
small: 只抓取小视频
|
|
small: 只抓取小视频
|
|
big: 只抓取视频
|
|
big: 只抓取视频
|
|
@@ -167,18 +166,15 @@ class HaoKanVideoAccount(object):
|
|
self.get_video_list()
|
|
self.get_video_list()
|
|
|
|
|
|
|
|
|
|
-
|
|
|
|
if __name__ == "__main__":
|
|
if __name__ == "__main__":
|
|
select_user_sql = f"""select * from crawler_user_v3 where source = 'haokanshipin';"""
|
|
select_user_sql = f"""select * from crawler_user_v3 where source = 'haokanshipin';"""
|
|
user_list = MysqlHelper.get_values("author", "haokanshipin", select_user_sql, "prod", action="")
|
|
user_list = MysqlHelper.get_values("author", "haokanshipin", select_user_sql, "prod", action="")
|
|
print(json.dumps(user_list[0], ensure_ascii=False, indent=4))
|
|
print(json.dumps(user_list[0], ensure_ascii=False, indent=4))
|
|
- print(user_list[0]['link'])
|
|
|
|
-
|
|
|
|
- # T = HaoKanVideoAccount(
|
|
|
|
- # platform="haokanshipin",
|
|
|
|
- # mode="author",
|
|
|
|
- # rule_dict={},
|
|
|
|
- # user_dict={"link": 1657075178605219},
|
|
|
|
- # env="prod",
|
|
|
|
- # )
|
|
|
|
- # T.get_tiny_video_list()
|
|
|
|
|
|
+ T = HaoKanVideoAccount(
|
|
|
|
+ platform="haokanshipin",
|
|
|
|
+ mode="author",
|
|
|
|
+ rule_dict={},
|
|
|
|
+ user_dict=user_list[0],
|
|
|
|
+ env="prod",
|
|
|
|
+ )
|
|
|
|
+ T.schedule()
|