فهرست منبع

视频号——title_bugfix

罗俊辉 1 سال پیش
والد
کامیت
615d060d36
2فایلهای تغییر یافته به همراه24 افزوده شده و 17 حذف شده
  1. 22 16
      shipinhao/shipinhao_author/shipinhao_author_test.py
  2. 2 1
      shipinhao/shipinhao_author/shipinhao_scheduling.py

+ 22 - 16
shipinhao/shipinhao_author/shipinhao_author_test.py

@@ -8,6 +8,7 @@ import requests
 
 sys.path.append(os.getcwd())
 from common.pipeline import PiaoQuanPipelineTest
+from common.mq import MQ
 from common.db import MysqlHelper
 
 
@@ -32,21 +33,25 @@ class ShiPinHaoAccount:
         self.env = env
         self.download_cnt = 0
         self.token_count = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
 
     def get_token_from_mysql(self):
-        select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
-        # print(select_sql)
-        configs = MysqlHelper.get_values(
-            log_type=self.mode,
-            crawler=self.platform,
-            sql=select_sql,
-            env=self.env,
-            machine="",
-        )
-        token_config = configs[0][0]
-        token_info = json.loads(token_config)
-        self.token = token_info["token"]
-        self.cookie = token_info["cookie"]
+        # select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
+        # # print(select_sql)
+        # configs = MysqlHelper.get_values(
+        #     log_type=self.mode,
+        #     crawler=self.platform,
+        #     sql=select_sql,
+        #     env=self.env,
+        #     machine="",
+        # )
+        # print(configs)
+        # token_config = configs[0][0]
+        # token_info = json.loads(token_config)
+        # self.token = token_info["token"]
+        # self.cookie = token_info["cookie"]
+        self.token = "766484754"
+        self.cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3524986952|1|fgp|0; uuid=a76c16bf749aaf6418aa610ad5c6e66c; rand_info=CAESIDhWIfyhucI9xQkQm/2xYzaHtaGjRUbHeNKgSt4b382C; slave_bizuin=3930572231; data_bizuin=3930572231; bizuin=3930572231; data_ticket=k3o3TmbxDq450TMRpBL2zW+f1onbHFg7G4/9iLi/jlp1zyWQtmpjxFouT+/kRE1e; slave_sid=TndTREg5TW9MaFUxRllkaVFacXh6bVhFSEhpSEVRNUc2RWtBbnJRZmdxZzNxaUpOc29oRGJ1RjhFZm9jNXZ3Q1JzUzN3elFDYlVjZTEyN1YyWm9nOGhsUW9sNTFEUEtDRmo1Z0hzZjA1ZjhibXg0YzVrOE91N3ZOZWVqT3UxT0FSN3lsNG9SNTNNdEE2VWNC; slave_user=gh_deef7ad59a83; xid=9bd5b038d83164cbfa24bcf224bc9172; _clsk=bqf6jh|1699929305392|6|1|mp.weixin.qq.com/weheat-agent/payload/record"
         print(self.token)
         print(self.cookie)
 
@@ -62,7 +67,7 @@ class ShiPinHaoAccount:
             env=self.env,
             machine="",
         )
-        print(name_id[0])
+        print(name_id)
         if name_id:
             return name_id[0]
         else:
@@ -233,6 +238,7 @@ class ShiPinHaoAccount:
             video_dict["publish_time"] = video_dict["publish_time_str"]
             print(video_dict)
             print("成功发送 MQ 至 ETL")
+            self.mq.send_msg(video_dict)
             self.download_cnt += 1
         return True
 
@@ -243,9 +249,9 @@ if __name__ == "__main__":
     SP = ShiPinHaoAccount(
         platform="shipinhao",
         mode="author",
-        user_dict={"uid": "123456", "link": "心煤", "user_id": "1234565"},
+        user_dict={"uid": "123456", "link": "树树读书1014", "user_id": "1234565"},
         rule_dict={},
-        env="prod",
+        env="dev",
     )
 
     SP.get_account_videos()

+ 2 - 1
shipinhao/shipinhao_author/shipinhao_scheduling.py

@@ -13,6 +13,7 @@ from common import PiaoQuanPipeline, AliyunLogger
 from common.feishu import Feishu
 from common.db import MysqlHelper
 from common.mq import MQ
+from common.public import clean_title
 
 
 def find_target_user(name, user_list):
@@ -249,7 +250,7 @@ class ShiPinHaoAccount:
         trace_id = self.platform + str(uuid.uuid1())
         video_dict = {
             "video_id": video_obj["nonce_id"],
-            "video_title": video_obj["desc"],
+            "video_title": clean_title(video_obj["desc"].split("\n")[0].split("#")[0]),
             "out_video_id": video_obj["nonce_id"],
             "publish_time_stamp": int(time.time()),
             "publish_time_str": time.strftime(