Parcourir la source

修改测试版本的 pipeline, 和好看视频测试代码;

罗俊辉 il y a 1 an
Parent
commit
f1eb2afc7c
2 fichiers modifiés avec 13 ajouts et 16 suppressions
  1. 1 0
      common/pipeline.py
  2. 12 16
      haokanshipin/haokanshipin_author/hksp_test.py

+ 1 - 0
common/pipeline.py

@@ -237,6 +237,7 @@ class PiaoQuanPipelineTest:
         )
         if repeat_video:
             message = "重复的视频"
+            print(message)
             return False
         return True
 

+ 12 - 16
haokanshipin/haokanshipin_author/hksp_test.py

@@ -18,7 +18,7 @@ from common.scheduling_db import MysqlHelper
 
 class HaoKanVideoAccount(object):
     def __init__(self, platform, mode, rule_dict, user_dict, env):
-        self.account_id = user_dict["link"]
+        self.account_id = user_dict["link"].split("/")[-1]
         self.platform = platform
         self.mode = mode
         self.rule_dict = rule_dict
@@ -134,7 +134,6 @@ class HaoKanVideoAccount(object):
         item.add_video_info("session", "{}-{}".format(self.platform, int(time.time())))
         # 准备发往 MQ 的消息
         mq_obj = item.produce_item()
-        print(item.item)
         # 筛选规则的 pipeline
         pipeline = PiaoQuanPipelineTest(
             platform=self.platform,
@@ -145,11 +144,11 @@ class HaoKanVideoAccount(object):
             trace_id=trace_id,
         )
         if pipeline.process_item():
-            self.mq.send_msg(mq_obj)
-            print(mq_obj)
+            # self.mq.send_msg(mq_obj)
+            print(json.dumps(mq_obj))
             print("成功发送至 ETL")
 
-    def schedule_code(self):
+    def schedule(self):
         """
         small: 只抓取小视频
         big: 只抓取视频
@@ -167,18 +166,15 @@ class HaoKanVideoAccount(object):
                 self.get_video_list()
 
 
-
 if __name__ == "__main__":
     select_user_sql = f"""select * from crawler_user_v3 where source = 'haokanshipin';"""
     user_list = MysqlHelper.get_values("author", "haokanshipin", select_user_sql, "prod", action="")
     print(json.dumps(user_list[0], ensure_ascii=False, indent=4))
-    print(user_list[0]['link'])
-
-    # T = HaoKanVideoAccount(
-    #     platform="haokanshipin",
-    #     mode="author",
-    #     rule_dict={},
-    #     user_dict={"link": 1657075178605219},
-    #     env="prod",
-    # )
-    # T.get_tiny_video_list()
+    T = HaoKanVideoAccount(
+        platform="haokanshipin",
+        mode="author",
+        rule_dict={},
+        user_dict=user_list[0],
+        env="prod",
+    )
+    T.schedule()