wangkun 1 year ago
parent
commit
dd323ee688

+ 32 - 0
xiaoniangao/xiaoniangao_author/author_test.py

@@ -0,0 +1,32 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/19
+import json
+
+import requests
+
+url = "https://kapi-xng-app.xiaoniangao.cn/v1/album/user_public"
+next_t = -1
+visited_mid = 297080453
+payload = "{\"share_width\":300,\"log\":{\"brand\":\"iPad\",\"net\":\"wifi\",\"resolution\":\"750*1334\",\"uid\":\"2F310D09-5E32-5985-8644-3BCB6920E76F\",\"app_version\":\"1.22.5\",\"channel\":\"ios_app_store\",\"page\":\"\",\"product\":\"xng\",\"os_version\":\"15.7\",\"pf\":\"4\",\"session_id\":\"47D7817B-AAB1-4E70-BA7F-B868FC9AA21F\",\"idfa\":\"\",\"device\":\"iPad Pro (12.9-inch) (3rd generation)\"},\"qs\":\"imageMogr2\\/gravity\\/center\\/rotate\\/$\\/thumbnail\\/!690x385r\\/interlace\\/1\\/format\\/jpg\",\"share_height\":240,\"start_t\":-1,\"token\":\"\",\"visited_mid\":211201301,\"limit\":20}"
+payload_dic = json.loads(payload)
+payload_dic['visited_mid'] = visited_mid
+payload_new = json.dumps(payload_dic)
+headers = {
+  'Host': 'kapi-xng-app.xiaoniangao.cn',
+  'content-type': 'application/json; charset=utf-8',
+  'accept': '*/*',
+  'authorization': 'hSNQ2s9pvPxvFn4LaQJxKQ6/7Is=',
+  'verb': 'POST',
+  'content-md5': 'c7b7f8663984e8800e3bcd9b44465083',
+  'x-b3-traceid': '2f9da41f960ae077',
+  'accept-language': 'zh-cn',
+  'date': 'Mon, 19 Jun 2023 06:41:17 GMT',
+  'x-token-id': '',
+  'x-signaturemethod': 'hmac-sha1',
+  'user-agent': 'xngapp/157 CFNetwork/1335.0.3.1 Darwin/21.6.0'
+}
+
+response = requests.post(url, headers=headers, data=payload_new)
+
+print(response.text)

+ 25 - 48
xiaoniangao/xiaoniangao_author/xiaoniangao_author_scheduling.py

@@ -31,54 +31,31 @@ class XiaoniangaoAuthorScheduling:
     # 获取个人主页视频
     @classmethod
     def get_videoList(cls, log_type, crawler, rule_dict, user_dict, env):
-        next_t = None
+        next_t = -1
         while True:
-            url = "https://api.xiaoniangao.cn/profile/list_album"
+            url = "https://kapi-xng-app.xiaoniangao.cn/v1/album/user_public"
+            payload = "{\"share_width\":300,\"log\":{\"brand\":\"iPad\",\"net\":\"wifi\",\"resolution\":\"750*1334\",\"uid\":\"2F310D09-5E32-5985-8644-3BCB6920E76F\",\"app_version\":\"1.22.5\",\"channel\":\"ios_app_store\",\"page\":\"\",\"product\":\"xng\",\"os_version\":\"15.7\",\"pf\":\"4\",\"session_id\":\"47D7817B-AAB1-4E70-BA7F-B868FC9AA21F\",\"idfa\":\"\",\"device\":\"iPad Pro (12.9-inch) (3rd generation)\"},\"qs\":\"imageMogr2\\/gravity\\/center\\/rotate\\/$\\/thumbnail\\/!690x385r\\/interlace\\/1\\/format\\/jpg\",\"share_height\":240,\"start_t\":-1,\"token\":\"\",\"visited_mid\":211201301,\"limit\":20}"
+            payload_dic = json.loads(payload)
+            payload_dic['start_t'] = next_t
+            payload_dic['visited_mid'] = int(user_dict['link'])
+            payload_new = json.dumps(payload_dic)
             headers = {
-                "X-Mid": '1fb47aa7a860d9',
-                "X-Token-Id": '9f2cb91f9952c107ecb73642083e1dec-1145266232',
-                "content-type": "application/json",
-                "uuid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
-                "Accept-Encoding": "gzip,compress,br,deflate",
-                "User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_7_1 like Mac OS X)"
-                              " AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 "
-                              "MicroMessenger/8.0.20(0x18001435) NetType/WIFI Language/zh_CN",
-                "Referer": 'https://servicewechat.com/wxd7911e4c177690e4/654/page-frame.html'
-            }
-            json_text = {
-                "visited_mid": str(user_dict['link']),
-                "start_t": next_t,
-                "qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!690x385r/crop/690x385/interlace/1/format/jpg",
-                "h_qs": "imageMogr2/gravity/center/rotate/$/thumbnail/!120x120r/crop/120x120/interlace/1/format/jpg",
-                "limit": 20,
-                "token": '54e4c603f7bf3dc009c86b49ed91be36',
-                "uid": 'f40c2e7c-3cfb-4804-b513-608c0280268c',
-                "proj": "ma",
-                "wx_ver": "8.0.23",
-                "code_ver": "3.68.0",
-                "log_common_params": {
-                    "e": [{
-                        "data": {
-                            "page": "profilePage",
-                            "topic": "public"
-                        }
-                    }],
-                    "ext": {
-                        "brand": "iPhone",
-                        "device": "iPhone 11",
-                        "os": "iOS 14.7.1",
-                        "weixinver": "8.0.23",
-                        "srcver": "2.24.7",
-                        "net": "wifi",
-                        "scene": "1089"
-                    },
-                    "pj": "1",
-                    "pf": "2",
-                    "session_id": "7468cf52-00ea-432e-8505-6ea3ad7ec164"
-                }
+                'Host': 'kapi-xng-app.xiaoniangao.cn',
+                'content-type': 'application/json; charset=utf-8',
+                'accept': '*/*',
+                'authorization': 'hSNQ2s9pvPxvFn4LaQJxKQ6/7Is=',
+                'verb': 'POST',
+                'content-md5': 'c7b7f8663984e8800e3bcd9b44465083',
+                'x-b3-traceid': '2f9da41f960ae077',
+                'accept-language': 'zh-cn',
+                'date': 'Mon, 19 Jun 2023 06:41:17 GMT',
+                'x-token-id': '',
+                'x-signaturemethod': 'hmac-sha1',
+                'user-agent': 'xngapp/157 CFNetwork/1335.0.3.1 Darwin/21.6.0'
             }
+
             urllib3.disable_warnings()
-            r = requests.post(url=url, headers=headers, json=json_text, proxies=proxies, verify=False)
+            r = requests.post(url=url, headers=headers, data=payload_new, proxies=proxies, verify=False)
             if 'data' not in r.text or r.status_code != 200:
                 Common.logger(log_type, crawler).info(f"get_videoList:{r.text}\n")
                 Common.logging(log_type, crawler, env, f"get_videoList:{r.text}\n")
@@ -112,8 +89,8 @@ class XiaoniangaoAuthorScheduling:
                         publish_time_stamp = int(int(feeds[i].get("t", 0)) / 1000)
                         publish_time_str = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(publish_time_stamp))
                         # 用户名 / 头像
-                        user_name = feeds[i].get("album_user", {}).get("nick", "").strip().replace("\n", "") \
-                            .replace("/", "").replace("快手", "").replace(" ", "") \
+                        user_name = feeds[i].get("user", {}).get("nick", "").strip().replace("\n", "") \
+                            .replace("/", "").replace(" ", "") \
                             .replace(" ", "").replace("&NBSP", "").replace("\r", "")
 
                         video_dict = {
@@ -129,9 +106,9 @@ class XiaoniangaoAuthorScheduling:
                             "publish_time_str": publish_time_str,
                             "video_width": int(feeds[i].get("w", 0)),
                             "video_height": int(feeds[i].get("h", 0)),
-                            "avatar_url": feeds[i].get("album_user", {}).get("hurl", ""),
+                            "avatar_url": feeds[i].get("user", {}).get("hurl", ""),
                             "profile_id": feeds[i]["id"],
-                            "profile_mid": feeds[i]["mid"],
+                            "profile_mid": feeds[i].get("user", {}).get("mid", ""),
                             "cover_url": feeds[i].get("url", ""),
                             "video_url": feeds[i].get("v_url", ""),
                             "session": f"xiaoniangao-author-{int(time.time())}"

+ 24 - 0
xiaoniangao/xiaoniangao_main/run_xng_author_dev.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/6/19
+import os
+import sys
+sys.path.append(os.getcwd())
+from common.common import Common
+from xiaoniangao.xiaoniangao_author.xiaoniangao_author_scheduling import XiaoniangaoAuthorScheduling
+
+
+def main(log_type, crawler, env):
+    Common.logger(log_type, crawler).info(f'开始抓取:小年糕账号\n')
+    Common.logging(log_type, crawler, env, "开始抓取:小年糕账号\n")
+    XiaoniangaoAuthorScheduling.get_author_videos(log_type=log_type,
+                                                  crawler=crawler,
+                                                  user_list=[{"uid": 6267140, "source": "xiaoniangao", "link": "108137355", "nick_name": "小年糕账号测试账号", "avatar_url": "http://rescdn.yishihui.com/user/default/avatar/live/1616555588736_u=2429663738,2803743392&fm=26&gp=0.jpg", "mode": "author"}],
+                                                  rule_dict={"play_cnt":{"min":500,"max":0},"period":{"min":3,"max":3},"duration":{"min":40,"max":0}},
+                                                  env=env)
+    Common.logger(log_type, crawler).info("抓取一轮结束\n")
+    Common.logging(log_type, crawler, env, "抓取一轮结束\n")
+
+
+if __name__ == "__main__":
+    main("author", "xiaoniangao", "dev")