Browse Source

Merge remote-tracking branch 'origin/master'

zhangyong 1 year ago
parent
commit
0fe343566b

+ 33 - 19
shipinhao/shipinhao_author/shipinhao_author_test.py

@@ -8,6 +8,7 @@ import requests
 
 sys.path.append(os.getcwd())
 from common.pipeline import PiaoQuanPipelineTest
+from common.mq import MQ
 from common.db import MysqlHelper
 
 
@@ -32,21 +33,27 @@ class ShiPinHaoAccount:
         self.env = env
         self.download_cnt = 0
         self.token_count = 0
+        self.mq = MQ(topic_name="topic_crawler_etl_" + self.env)
 
     def get_token_from_mysql(self):
-        select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
-        # print(select_sql)
-        configs = MysqlHelper.get_values(
-            log_type=self.mode,
-            crawler=self.platform,
-            sql=select_sql,
-            env=self.env,
-            machine="",
-        )
-        token_config = configs[0][0]
-        token_info = json.loads(token_config)
-        self.token = token_info["token"]
-        self.cookie = token_info["cookie"]
+        # select_sql = f"""SELECT config from crawler_config where source = '{ self.platform }'; """
+        # # print(select_sql)
+        # configs = MysqlHelper.get_values(
+        #     log_type=self.mode,
+        #     crawler=self.platform,
+        #     sql=select_sql,
+        #     env=self.env,
+        #     machine="",
+        # )
+        # print(configs)
+        # token_config = configs[0][0]
+        # token_info = json.loads(token_config)
+        # self.token = token_info["token"]
+        # self.cookie = token_info["cookie"]
+        self.token = "766484754"
+        self.cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3524986952|1|fgp|0; uuid=a76c16bf749aaf6418aa610ad5c6e66c; rand_info=CAESIDhWIfyhucI9xQkQm/2xYzaHtaGjRUbHeNKgSt4b382C; slave_bizuin=3930572231; data_bizuin=3930572231; bizuin=3930572231; data_ticket=k3o3TmbxDq450TMRpBL2zW+f1onbHFg7G4/9iLi/jlp1zyWQtmpjxFouT+/kRE1e; slave_sid=TndTREg5TW9MaFUxRllkaVFacXh6bVhFSEhpSEVRNUc2RWtBbnJRZmdxZzNxaUpOc29oRGJ1RjhFZm9jNXZ3Q1JzUzN3elFDYlVjZTEyN1YyWm9nOGhsUW9sNTFEUEtDRmo1Z0hzZjA1ZjhibXg0YzVrOE91N3ZOZWVqT3UxT0FSN3lsNG9SNTNNdEE2VWNC; slave_user=gh_deef7ad59a83; xid=9bd5b038d83164cbfa24bcf224bc9172; _clsk=bqf6jh|1699929305392|6|1|mp.weixin.qq.com/weheat-agent/payload/record"
+        print(self.token)
+        print(self.cookie)
 
     def get_history_id(self):
         """
@@ -60,6 +67,7 @@ class ShiPinHaoAccount:
             env=self.env,
             machine="",
         )
+        print(name_id)
         if name_id:
             return name_id[0]
         else:
@@ -96,7 +104,9 @@ class ShiPinHaoAccount:
             }
             response = requests.request("GET", url, headers=headers, params=params)
             self.token_count += 1
-            user_list = response.json()["acct_list"]
+            user_list = response.json()
+            print(user_list)
+            user_list = user_list["acct_list"]
             target_user = find_target_user(name=self.account_name, user_list=user_list)
             # 写入 MySql 数据库
             if target_user:
@@ -117,6 +127,8 @@ class ShiPinHaoAccount:
         # 一个账号最多抓 30 条数据
         self.get_token_from_mysql()
         user_id = self.get_account_id()
+        print("ljh", user_id)
+        print(type(user_id))
         if user_id:
             url = "https://mp.weixin.qq.com/cgi-bin/videosnap"
             headers = {
@@ -141,6 +153,7 @@ class ShiPinHaoAccount:
                     "count": "15",
                     "scene": "1",
                     "token": self.token,
+                    # "token": "123456",
                     "lang": "zh_CN",
                     "f": "json",
                     "ajax": "1",
@@ -161,7 +174,7 @@ class ShiPinHaoAccount:
                     )
                     time.sleep(60 * 15)
                     continue
-                if not res_json["list"]:
+                if not res_json.get("list"):
                     print("没有更多视频了")
                     return
                 else:
@@ -225,19 +238,20 @@ class ShiPinHaoAccount:
             video_dict["publish_time"] = video_dict["publish_time_str"]
             print(video_dict)
             print("成功发送 MQ 至 ETL")
+            self.mq.send_msg(video_dict)
             self.download_cnt += 1
         return True
 
 
 if __name__ == "__main__":
-    temp_token = "2080949641"
-    temp_cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3930572231|1|fgk|0; uuid=6562bbd8859230ce4120dfa063c76997; rand_info=CAESIGAatjSIjvxVJVDxRDN7F/CNFWMifvAVqje98rd++8UY; slave_bizuin=3236647229; data_bizuin=3236647229; bizuin=3236647229; data_ticket=qm3i6jRhObs1yKHttGh0gVI02Mz7FTPfatn0RMLdaWyD7Ukcokm5Dc3mmYLQUZPg; slave_sid=UWxjZnhBREZRRTNKZ3dYZTlYRE9Db2lxQUhOM3lZUlRoMkV0MG1wdVVudGpQTWxnVkxzYW5pV2c3NjB3bnAyQ2lPaXBBVVRPazEybWtKSVEzTnUyazZ6WEJsdnFaWWVDaUFrM3pTTXRkeUNJS3RNVTc2NFRBWkZiVGQzYllacEFRalBBZ2tXZlltblJYS2VS; slave_user=gh_d284c09295eb; xid=cb96e6ba4b4960d74a22869b1bb21406; _clsk=z77guf|1699532621466|4|1|mp.weixin.qq.com/weheat-agent/payload/record"
+    # temp_token = "2080949641"
+    # temp_cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3930572231|1|fgk|0; uuid=6562bbd8859230ce4120dfa063c76997; rand_info=CAESIGAatjSIjvxVJVDxRDN7F/CNFWMifvAVqje98rd++8UY; slave_bizuin=3236647229; data_bizuin=3236647229; bizuin=3236647229; data_ticket=qm3i6jRhObs1yKHttGh0gVI02Mz7FTPfatn0RMLdaWyD7Ukcokm5Dc3mmYLQUZPg; slave_sid=UWxjZnhBREZRRTNKZ3dYZTlYRE9Db2lxQUhOM3lZUlRoMkV0MG1wdVVudGpQTWxnVkxzYW5pV2c3NjB3bnAyQ2lPaXBBVVRPazEybWtKSVEzTnUyazZ6WEJsdnFaWWVDaUFrM3pTTXRkeUNJS3RNVTc2NFRBWkZiVGQzYllacEFRalBBZ2tXZlltblJYS2VS; slave_user=gh_d284c09295eb; xid=cb96e6ba4b4960d74a22869b1bb21406; _clsk=z77guf|1699532621466|4|1|mp.weixin.qq.com/weheat-agent/payload/record"
     SP = ShiPinHaoAccount(
         platform="shipinhao",
         mode="author",
-        user_dict={"uid": "123456", "link": "心煤", "user_id": "1234565"},
+        user_dict={"uid": "123456", "link": "树树读书1014", "user_id": "1234565"},
         rule_dict={},
-        env="prod",
+        env="dev",
     )
 
     SP.get_account_videos()

+ 18 - 3
shipinhao/shipinhao_author/shipinhao_scheduling.py

@@ -13,6 +13,7 @@ from common import PiaoQuanPipeline, AliyunLogger
 from common.feishu import Feishu
 from common.db import MysqlHelper
 from common.mq import MQ
+from common.public import clean_title
 
 
 def find_target_user(name, user_list):
@@ -67,7 +68,7 @@ class ShiPinHaoAccount:
             machine="",
         )
         if name_id:
-            return name_id[0]
+            return name_id[0][0]
         else:
             return False
 
@@ -101,11 +102,25 @@ class ShiPinHaoAccount:
                 "x-requested-with": "XMLHttpRequest",
             }
             response = requests.request("GET", url, headers=headers, params=params)
+
+            # try:
+            #     user_list = response.json()["acct_list"]
+            # except:
+            #     if 20 >= datetime.datetime.now().hour >= 10:
+            #         Feishu.bot(
+            #             log_type=self.mode,
+            #             crawler=self.platform,
+            #             text="视频号Token 过期啦"
+            #             # text=f"{token_dict['title']}\n操作人:{token_dict['operator']}\n更换日期:{token_dict['update_time']} \n频控啦,请扫码更换其他公众号token\nhttps://mp.weixin.qq.com/"
+            #         )
+            #     # time.sleep(60 * 15)
+            #     # continue
+            #     return
             user_list = response.json()["acct_list"]
             target_user = find_target_user(name=self.account_name, user_list=user_list)
             # 写入 MySql 数据库
             if target_user:
-                update_sql = f"""INSERT INTO accounts (name, name_id, platform) values ("{self.account_name}", "{target_user['username']}", "{self.platform}")"""
+                update_sql = f"""INSERT INTO accounts (name, name_id, platform, useful) values ("{self.account_name}", "{target_user['username']}", "{self.platform}", 1 )"""
                 # print(update_sql)
                 MysqlHelper.update_values(
                     log_type=self.mode,
@@ -235,7 +250,7 @@ class ShiPinHaoAccount:
         trace_id = self.platform + str(uuid.uuid1())
         video_dict = {
             "video_id": video_obj["nonce_id"],
-            "video_title": video_obj["desc"],
+            "video_title": clean_title(video_obj["desc"].split("\n")[0].split("#")[0]),
             "out_video_id": video_obj["nonce_id"],
             "publish_time_stamp": int(time.time()),
             "publish_time_str": time.strftime(