2 rokov pred · 465aba1858
--- a/gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py
+++ b/gongzhonghao/gongzhonghao_author/gongzhonghao2_author.py
@@ -458,7 +458,7 @@ class GongzhonghaoAuthor2:
 
				                                           env=env)
			
 
				                 Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
			
 
				                 try:
			
 
				-                    Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
			
 
				+                    # Common.logger(log_type, crawler).info(f'获取 {user_dict["user_name"]} 公众号视频\n')
			
 
				                     cls.get_videoList(log_type=log_type,
			
 
				                                       crawler=crawler,
			
 
				                                       rule_dict=rule_dict,
			
--- a/kuaishou/kuaishou_author/kuaishou_author.py
+++ b/kuaishou/kuaishou_author/kuaishou_author.py
@@ -14,7 +14,7 @@ sys.path.append(os.getcwd())
 
				 from common.common import Common
			
 
				 from common.feishu import Feishu
			
 
				 from common.getuser import getUser
			
 
				-from common.db import MysqlHelper
			
 
				+from common.scheduling_db import MysqlHelper
			
 
				 from common.publish import Publish
			
 
				 from common.public import random_title, get_config_from_mysql
			
 
				 from common.public import get_user_from_mysql
			
@@ -23,154 +23,6 @@ from common.public import get_user_from_mysql
 
				 class KuaishouauthorScheduling:
			
 
				     platform = "快手"
			
 
				 
			
 
				-    # 获取站外用户信息
			
 
				-    @classmethod
			
 
				-    def get_out_user_info(cls, log_type, crawler, out_uid):
			
 
				-        try:
			
 
				-            url = "https://www.kuaishou.com/graphql"
			
 
				-
			
 
				-            payload = json.dumps({
			
 
				-                "operationName": "visionProfile",
			
 
				-                "variables": {
			
 
				-                    "userId": out_uid
			
 
				-                },
			
 
				-                "query": "query visionProfile($userId: String) {\n  visionProfile(userId: $userId) {\n    result\n    hostName\n    userProfile {\n      ownerCount {\n        fan\n        photo\n        follow\n        photo_public\n        __typename\n      }\n      profile {\n        gender\n        user_name\n        user_id\n        headurl\n        user_text\n        user_profile_bg_url\n        __typename\n      }\n      isFollowing\n      __typename\n    }\n    __typename\n  }\n}\n"
			
 
				-            })
			
 
				-            # s = string.ascii_lowercase
			
 
				-            # r = random.choice(s)
			
 
				-            headers = {
			
 
				-                'Accept': '*/*',
			
 
				-                'Content-Type': 'application/json',
			
 
				-                'Origin': 'https://www.kuaishou.com',
			
 
				-                'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
			
 
				-                'Content-Length': '552',
			
 
				-                'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
			
 
				-                'Host': 'www.kuaishou.com',
			
 
				-                'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.6.1 Safari/605.1.15',
			
 
				-                'Referer': 'https://www.kuaishou.com/profile/{}'.format(out_uid),
			
 
				-                'Accept-Encoding': 'gzip, deflate, br',
			
 
				-                'Connection': 'keep-alive'
			
 
				-            }
			
 
				-            urllib3.disable_warnings()
			
 
				-            s = requests.session()
			
 
				-            # max_retries=3 重试3次
			
 
				-            s.mount('http://', HTTPAdapter(max_retries=3))
			
 
				-            s.mount('https://', HTTPAdapter(max_retries=3))
			
 
				-            response = s.post(url=url, headers=headers, data=payload, proxies=Common.tunnel_proxies(), verify=False,
			
 
				-                              timeout=5)
			
 
				-            response.close()
			
 
				-            # Common.logger(log_type, crawler).info(f"get_out_user_info_response:{response.text}")
			
 
				-            if response.status_code != 200:
			
 
				-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.text}\n")
			
 
				-                return
			
 
				-            elif 'data' not in response.json():
			
 
				-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()}\n")
			
 
				-                return
			
 
				-            elif 'visionProfile' not in response.json()['data']:
			
 
				-                Common.logger(log_type, crawler).warning(f"get_out_user_info_response:{response.json()['data']}\n")
			
 
				-                return
			
 
				-            elif 'userProfile' not in response.json()['data']['visionProfile']:
			
 
				-                Common.logger(log_type, crawler).warning(
			
 
				-                    f"get_out_user_info_response:{response.json()['data']['visionProfile']['userProfile']}\n")
			
 
				-                return
			
 
				-            else:
			
 
				-                userProfile = response.json()['data']['visionProfile']['userProfile']
			
 
				-                # Common.logger(log_type, crawler).info(f"userProfile:{userProfile}")
			
 
				-
			
 
				-                try:
			
 
				-                    out_fans_str = str(userProfile['ownerCount']['fan'])
			
 
				-                except Exception:
			
 
				-                    out_fans_str = "0"
			
 
				-
			
 
				-                try:
			
 
				-                    out_follow_str = str(userProfile['ownerCount']['follow'])
			
 
				-                except Exception:
			
 
				-                    out_follow_str = "0"
			
 
				-
			
 
				-                try:
			
 
				-                    out_avatar_url = userProfile['profile']['headurl']
			
 
				-                except Exception:
			
 
				-                    out_avatar_url = ""
			
 
				-
			
 
				-                Common.logger(log_type, crawler).info(f"out_fans_str:{out_fans_str}")
			
 
				-                Common.logger(log_type, crawler).info(f"out_follow_str:{out_follow_str}")
			
 
				-                Common.logger(log_type, crawler).info(f"out_avatar_url:{out_avatar_url}")
			
 
				-
			
 
				-                if "万" in out_fans_str:
			
 
				-                    out_fans = int(float(out_fans_str.split("万")[0]) * 10000)
			
 
				-                else:
			
 
				-                    out_fans = int(out_fans_str.replace(",", ""))
			
 
				-                if "万" in out_follow_str:
			
 
				-                    out_follow = int(float(out_follow_str.split("万")[0]) * 10000)
			
 
				-                else:
			
 
				-                    out_follow = int(out_follow_str.replace(",", ""))
			
 
				-
			
 
				-                out_user_dict = {
			
 
				-                    "out_fans": out_fans,
			
 
				-                    "out_follow": out_follow,
			
 
				-                    "out_avatar_url": out_avatar_url
			
 
				-                }
			
 
				-                Common.logger(log_type, crawler).info(f"out_user_dict:{out_user_dict}")
			
 
				-                return out_user_dict
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f"get_out_user_info:{e}\n")
			
 
				-
			
 
				-    # 获取用户信息列表
			
 
				-    @classmethod
			
 
				-    def get_user_list(cls, log_type, crawler, sheetid, env, machine):
			
 
				-        try:
			
 
				-            while True:
			
 
				-                user_sheet = Feishu.get_values_batch(log_type, crawler, sheetid)
			
 
				-                if user_sheet is None:
			
 
				-                    Common.logger(log_type, crawler).warning(f"user_sheet:{user_sheet} 10秒钟后重试")
			
 
				-                    continue
			
 
				-                our_user_list = []
			
 
				-                for i in range(1, len(user_sheet)):
			
 
				-                    # for i in range(1, 2):
			
 
				-                    out_uid = user_sheet[i][2]
			
 
				-                    user_name = user_sheet[i][3]
			
 
				-                    our_uid = user_sheet[i][6]
			
 
				-                    our_user_link = user_sheet[i][7]
			
 
				-                    if out_uid is None or user_name is None:
			
 
				-                        Common.logger(log_type, crawler).info("空行\n")
			
 
				-                    else:
			
 
				-                        Common.logger(log_type, crawler).info(f"正在更新 {user_name} 用户信息\n")
			
 
				-                        if our_uid is None:
			
 
				-                            out_user_info = cls.get_out_user_info(log_type, crawler, out_uid)
			
 
				-                            out_user_dict = {
			
 
				-                                "out_uid": out_uid,
			
 
				-                                "user_name": user_name,
			
 
				-                                "out_avatar_url": out_user_info["out_avatar_url"],
			
 
				-                                "out_create_time": '',
			
 
				-                                "out_tag": '',
			
 
				-                                "out_play_cnt": 0,
			
 
				-                                "out_fans": out_user_info["out_fans"],
			
 
				-                                "out_follow": out_user_info["out_follow"],
			
 
				-                                "out_friend": 0,
			
 
				-                                "out_like": 0,
			
 
				-                                "platform": cls.platform,
			
 
				-                                "tag": cls.tag,
			
 
				-                            }
			
 
				-                            our_user_dict = getUser.create_user(log_type=log_type, crawler=crawler,
			
 
				-                                                                out_user_dict=out_user_dict, env=env, machine=machine)
			
 
				-                            our_uid = our_user_dict['our_uid']
			
 
				-                            our_user_link = our_user_dict['our_user_link']
			
 
				-                            Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}',
			
 
				-                                                 [[our_uid, our_user_link]])
			
 
				-                            Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！\n')
			
 
				-                            our_user_list.append(our_user_dict)
			
 
				-                        else:
			
 
				-                            our_user_dict = {
			
 
				-                                'out_uid': out_uid,
			
 
				-                                'user_name': user_name,
			
 
				-                                'our_uid': our_uid,
			
 
				-                                'our_user_link': our_user_link,
			
 
				-                            }
			
 
				-                            our_user_list.append(our_user_dict)
			
 
				-                return our_user_list
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f'get_user_list:{e}\n')
			
 
				-
			
 
				     # 处理视频标题
			
 
				     @classmethod
			
 
				     def video_title(cls, log_type, crawler, env, title):
			
@@ -205,17 +57,24 @@ class KuaishouauthorScheduling:
 
				             return video_title
			
 
				 
			
 
				     @classmethod
			
 
				-    def get_did(cls, log_type, crawler):
			
 
				-        while True:
			
 
				-            did_sheet = Feishu.get_values_batch(log_type, crawler, "G7acT6")
			
 
				-            if did_sheet is None:
			
 
				-                Common.logger(log_type, crawler).warning(f"did_sheet:{did_sheet}")
			
 
				-                time.sleep(2)
			
 
				-                continue
			
 
				-            return did_sheet[0][1]
			
 
				+    def get_cookie(cls, log_type, crawler, env):
			
 
				+        select_sql = f""" select * from crawler_config where source="{crawler}" """
			
 
				+        configs = MysqlHelper.get_values(log_type, crawler, select_sql, env, action="")
			
 
				+        for config in configs:
			
 
				+            if "cookie" in config["config"]:
			
 
				+                cookie_dict = {
			
 
				+                    "cookie_id": config["id"],
			
 
				+                    "title": config["title"].strip(),
			
 
				+                    "cookie": dict(eval(config["config"]))["cookie"].strip(),
			
 
				+                    "update_time": time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(config["update_time"] / 1000))),
			
 
				+                    "operator": config["operator"].strip()
			
 
				+                }
			
 
				+                for k, v in cookie_dict.items():
			
 
				+                    print(f"{k}:{type(v)}, {v}")
			
 
				+                return cookie_dict
			
 
				 
			
 
				     @classmethod
			
 
				-    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine, pcursor=""):
			
 
				+    def get_videoList(cls, log_type, crawler, strategy, our_uid, out_uid, oss_endpoint, env, machine):
			
 
				         download_cnt_1, download_cnt_2 = 0, 0
			
 
				         rule_dict_1 = cls.get_rule(log_type, crawler, 1)
			
 
				         rule_dict_2 = cls.get_rule(log_type, crawler, 2)
			
@@ -237,7 +96,7 @@ class KuaishouauthorScheduling:
 
				             'Accept': '*/*',
			
 
				             'Content-Type': 'application/json',
			
 
				             'Origin': 'https://www.kuaishou.com',
			
 
				-            'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_did(log_type, crawler)}; kpn=KUAISHOU_VISION',
			
 
				+            'Cookie': f'kpf=PC_WEB; clientid=3; did={cls.get_(log_type, crawler)}; kpn=KUAISHOU_VISION',
			
 
				             'Content-Length': '1260',
			
 
				             'Accept-Language': 'zh-CN,zh-Hans;q=0.9',
			
 
				             'Host': 'www.kuaishou.com',
			
@@ -256,7 +115,7 @@ class KuaishouauthorScheduling:
 
				         if not feeds:
			
 
				             Common.logger(log_type, crawler).info("没有更多视频啦 ~\n")
			
 
				             return
			
 
				-        pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
			
 
				+        # pcursor = response.json()['data']['visionProfilePhotoList']['pcursor']
			
 
				         # Common.logger(log_type, crawler).info(f"feeds0: {feeds}\n")
			
 
				         for i in range(len(feeds)):
			
 
				             try:
			
@@ -584,5 +443,5 @@ class KuaishouauthorScheduling:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    print(KuaishouauthorScheduling.get_did("follow", "kuaishou"))
			
 
				+    KuaishouauthorScheduling.get_cookie("author", "kuaishou", "dev")
			
 
				     pass
			
--- a/shipinhao/shipinhao_main/run_shipinhao.sh
+++ b/shipinhao/shipinhao_main/run_shipinhao.sh
@@ -8,7 +8,7 @@ env=$4          # 爬虫运行环境，正式环境: prod / 测试环境: dev
 
				 if [ ${env} = "--env=prod" ];then
			
 
				   piaoquan_crawler_dir=/Users/piaoquan/Desktop/piaoquan_crawler/
			
 
				   profile_path=/etc/profile
			
 
				-  python=python3
			
 
				+  python=/usr/bin/python3
			
 
				   node_path=/usr/local/bin/node
			
 
				   nohup_log_path=${piaoquan_crawler_dir}shipinhao/logs/$(date +%Y-%m-%d)-run-shell.log
			
 
				 elif [ ${env} = "--env=dev" ];then