Browse Source

1. 优化了视频号的代码

罗俊辉 1 year ago
parent
commit
928eeea65f

+ 2 - 2
shipinhao/shipinhao_author/shipinhao_author_test.py

@@ -4,8 +4,8 @@ import requests
 
 class SphAuthor:
     def __init__(self, name):
-        self.token = "1678001807"
-        self.cookie = 'ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; uuid=c2c0f943291da7eff8f6972740e4f894; _clck=3930572231|1|fgk|0; rand_info=CAESILaJJXli7mbr458BaiPXyXMbWTe3TKCzsfFmsXaTOPcU; slave_bizuin=3524986952; data_bizuin=3524986952; bizuin=3524986952; data_ticket=2/3sHr4KYg12+LGHQV6k5K3pJ6S8S4nAYWhKsIhhij/OcGFjUysTrpY75aUZy9M9; slave_sid=a2JpR21wSF9xRmNLU1V1Ylh5U0Ywd1o4MUdyZ3FVZXhfNGUyWXc3dURfbWlpdjFUcUl5elp0RURpWERwTktBb3VDenFsR2hxbHZ2cnRBdjZJSk9oMnRtSV83azFWOG9WbDd5U0h6Q1RkajhxY21CTmxzSFlYZDVjaUhteWozbzRFS3luRzNUUExzVmxkRzhG; slave_user=gh_0d8cf8319a3b; xid=84a3765ddefaf98f144be8b1aafa0d58; _clsk=1xoy7wc|1699512584249|6|1|mp.weixin.qq.com/weheat-agent/payload/record; bizuin=3524986952; data_bizuin=3524986952; data_ticket=2/3sHr4KYg12+LGHQV6k5K3pJ6S8S4nAYWhKsIhhij/OcGFjUysTrpY75aUZy9M9; rand_info=CAESILaJJXli7mbr458BaiPXyXMbWTe3TKCzsfFmsXaTOPcU; slave_bizuin=3524986952; slave_sid=a2JpR21wSF9xRmNLU1V1Ylh5U0Ywd1o4MUdyZ3FVZXhfNGUyWXc3dURfbWlpdjFUcUl5elp0RURpWERwTktBb3VDenFsR2hxbHZ2cnRBdjZJSk9oMnRtSV83azFWOG9WbDd5U0h6Q1RkajhxY21CTmxzSFlYZDVjaUhteWozbzRFS3luRzNUUExzVmxkRzhG; slave_user=gh_0d8cf8319a3b'
+        self.token = "2080949641"
+        self.cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3930572231|1|fgk|0; uuid=6562bbd8859230ce4120dfa063c76997; rand_info=CAESIGAatjSIjvxVJVDxRDN7F/CNFWMifvAVqje98rd++8UY; slave_bizuin=3236647229; data_bizuin=3236647229; bizuin=3236647229; data_ticket=qm3i6jRhObs1yKHttGh0gVI02Mz7FTPfatn0RMLdaWyD7Ukcokm5Dc3mmYLQUZPg; slave_sid=UWxjZnhBREZRRTNKZ3dYZTlYRE9Db2lxQUhOM3lZUlRoMkV0MG1wdVVudGpQTWxnVkxzYW5pV2c3NjB3bnAyQ2lPaXBBVVRPazEybWtKSVEzTnUyazZ6WEJsdnFaWWVDaUFrM3pTTXRkeUNJS3RNVTc2NFRBWkZiVGQzYllacEFRalBBZ2tXZlltblJYS2VS; slave_user=gh_d284c09295eb; xid=cb96e6ba4b4960d74a22869b1bb21406; _clsk=z77guf|1699532621466|4|1|mp.weixin.qq.com/weheat-agent/payload/record"
         self.name = name
 
     def get_user_id(self):

+ 78 - 38
shipinhao/shipinhao_author/shipinhao_scheduling.py

@@ -3,22 +3,9 @@ from common.aliyun_log import AliyunLogger
 from common.db import MysqlHelper
 
 
-def get_history_id(name):
-    """
-    从数据库表中读取 id
-    """
-    # select_user_sql = f"""select * from crawler_user_v3 where ={task_id}"""
-    # user_list = MysqlHelper.get_values(log_type, crawler, select_user_sql, env, action="")
-    name_id_dict = {}
-    if name_id_dict.get(name):
-        return name_id_dict[name]
-    else:
-        return False
-
-
 def find_target_user(name, user_list):
     for obj in user_list:
-        if obj['name'] == name:
+        if obj['nickname'] == name:
             return obj
         else:
             continue
@@ -26,14 +13,29 @@ def find_target_user(name, user_list):
 
 
 class ShiPinHaoAccount:
-    def __init__(self, token, cookie, account_name):
+    def __init__(self, token, cookie, account_name, platform, mode, rule_dict, env):
         self.token = token
         self.cookie = cookie
         self.account_name = account_name
+        self.platform = platform
+        self.mode = mode
+        self.rule_dict = rule_dict
+        self.env = env
+
+    def get_history_id(self):
+        """
+        从数据库表中读取 id
+        """
+        select_user_sql = f"""select name_id from accounts where name = "{self.account_name}" and platform = "{self.platform}" and useful = 1 limit 1"""
+        name_id = MysqlHelper.get_values(log_type=self.mode, crawler=self.platform, sql=select_user_sql, env=self.env, machine="")
+        if name_id:
+            return name_id[0]
+        else:
+            return False
 
     def get_account_id(self):
         # 读历史数据,如果存在 id,则直接返回 id
-        history_id = get_history_id(self.account_name)
+        history_id = self.get_history_id()
         if history_id:
             return history_id
         else:
@@ -62,38 +64,76 @@ class ShiPinHaoAccount:
             response = requests.request("GET", url, headers=headers, params=params)
             user_list = response.json()['acct_list']
             target_user = find_target_user(name=self.account_name, user_list=user_list)
-            return target_user
+            # 写入 MySql 数据库
+            if target_user:
+                update_sql = f"""INSERT INTO accounts (name, name_id, platform) values ("{self.account_name}", "{target_user['username']}", "{self.platform}")"""
+                print(update_sql)
+                MysqlHelper.update_values(log_type=self.mode, crawler=self.platform, sql=update_sql, env=self.env, machine="")
+                return target_user['username']
+            else:
+                return False
 
     def get_account_videos(self):
-        user_info = self.get_account_id()
-        if user_info:
+        user_id = self.get_account_id()
+        buffer = ""
+        if user_id:
             url = "https://mp.weixin.qq.com/cgi-bin/videosnap"
-            params = {
-                "action": "get_feed_list",
-                "username": user_info['username'],
-                "buffer": "",
-                "count": "15",
-                "scene": "1",
-                "token": self.token,
-                "lang": "zh_CN",
-                "f": "json",
-                "ajax": "1"
-            }
             headers = {
                 'authority': 'mp.weixin.qq.com',
                 'accept': '*/*',
                 'accept-language': 'en,zh-CN;q=0.9,zh;q=0.8',
                 'cookie': self.cookie,
-                'referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN'.format(self.token),
+                'referer': 'https://mp.weixin.qq.com/cgi-bin/appmsg?t=media/appmsg_edit_v2&action=edit&isNew=1&type=77&createType=0&token={}&lang=zh_CN'.format(
+                    self.token),
                 'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
                 'x-requested-with': 'XMLHttpRequest'
             }
-
-            response = requests.request("GET", url, headers=headers, params=params)
-            video_list = response.json()
-            # print(json.dumps(video_list, ensure_ascii=False, indent=4))
-            # print(len(video_list['list']))
-            for obj in video_list['list']:
-                print(obj['desc'])
+            while True:
+                params = {
+                    "action": "get_feed_list",
+                    "username": user_id,
+                    "buffer": buffer,
+                    "count": "15",
+                    "scene": "1",
+                    "token": self.token,
+                    "lang": "zh_CN",
+                    "f": "json",
+                    "ajax": "1"
+                }
+                response = requests.request("GET", url, headers=headers, params=params)
+                video_list = response.json()
+                buffer = video_list['last_buff']
+                # print(json.dumps(video_list, ensure_ascii=False, indent=4))
+                # print(len(video_list['list']))
+                for obj in video_list['list']:
+                    print(obj['desc'])
         else:
             print("Did not find any user info")
+
+    def process_video_obj(self, video_obj):
+        video_dict = {
+            "video_id": video_obj['nonce_id'],
+            "video_title": video_obj['desc'],
+            "cover_url": video_obj['media']["cover_url"],
+            "video_url": video_obj['media']['video_url'],
+            "avatar_url": video_obj['head_url'],
+            "width": video_obj['media']['width'],
+            "height": video_obj['media']['height']
+        }
+        print(self.platform)
+
+
+if __name__ == "__main__":
+    temp_token = "2080949641"
+    temp_cookie = "ua_id=bw4VuFJr6fAuSkwdAAAAAClaW0m9Aua-6IfHaXU_zpo=; wxuin=95302180931488; mm_lang=zh_CN; RK=kreEMgtMMJ; ptcz=8fd1b267c98a1185bbe6455a081f1264048ee388363ca305d9ef4812892c7900; qq_domain_video_guid_verify=2ba78a5010233582; poc_sid=HOinP2Wj322Ex737kV651Zqy6y8fSprOUUvaegBg; _qimei_q36=; _qimei_h38=9eea33ea92afe8a922333fce03000001317916; pgv_pvid=9056371236; _clck=3930572231|1|fgk|0; uuid=6562bbd8859230ce4120dfa063c76997; rand_info=CAESIGAatjSIjvxVJVDxRDN7F/CNFWMifvAVqje98rd++8UY; slave_bizuin=3236647229; data_bizuin=3236647229; bizuin=3236647229; data_ticket=qm3i6jRhObs1yKHttGh0gVI02Mz7FTPfatn0RMLdaWyD7Ukcokm5Dc3mmYLQUZPg; slave_sid=UWxjZnhBREZRRTNKZ3dYZTlYRE9Db2lxQUhOM3lZUlRoMkV0MG1wdVVudGpQTWxnVkxzYW5pV2c3NjB3bnAyQ2lPaXBBVVRPazEybWtKSVEzTnUyazZ6WEJsdnFaWWVDaUFrM3pTTXRkeUNJS3RNVTc2NFRBWkZiVGQzYllacEFRalBBZ2tXZlltblJYS2VS; slave_user=gh_d284c09295eb; xid=cb96e6ba4b4960d74a22869b1bb21406; _clsk=z77guf|1699532621466|4|1|mp.weixin.qq.com/weheat-agent/payload/record"
+    SP = ShiPinHaoAccount(
+        token=temp_token,
+        cookie=temp_cookie,
+        account_name="心煤",
+        platform="shipinhao",
+        mode="author",
+        rule_dict={},
+        env="prod"
+    )
+    SP.get_account_videos()
+