2 tahun lalu · 7a636d7b26
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.MD
+++ b/README.MD
@@ -20,6 +20,7 @@ ${nohup_dir}:       nohup日志存储路径，如: ./youtube/nohup.log
 
				 ```
			
 
				 youtube定向榜运行命令: 
			
 
				 sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="hk" --env="prod" --machine="aliyun_hk" youtube/nohup.log
			
 
				+sh ./main/main.sh ./youtube/youtube_main/run_youtube_follow.py --log_type="follow" --crawler="youtube" --strategy="定向爬虫策略" --oss_endpoint="hk" --env="dev" --machine="aliyun_hk" youtube/nohup.log
			
 
				 youtube定向榜杀进程命令: 
			
 
				 ps aux | grep run_youtube | grep -v grep | awk '{print $2}' | xargs kill -9
			
 
				 ps aux | grep run_youtube | grep Python | grep -v grep | awk '{print $2}' | xargs kill -9
			
--- a/common/common.py
+++ b/common/common.py
@@ -72,7 +72,7 @@ class Common:
 
				         else:
			
 
				             for file in all_logs[:len(all_logs) - 10]:
			
 
				                 os.remove(log_dir + file)
			
 
				-        cls.logger(log_type, crawler).info("清除日志成功")
			
 
				+        cls.logger(log_type, crawler).info("清除日志成功\n")
			
 
				 
			
 
				     # 删除 charles 缓存文件，只保留最近的两个文件
			
 
				     @classmethod
			
@@ -81,7 +81,7 @@ class Common:
 
				         all_file = sorted(os.listdir(f"./{crawler}/{crawler}_chlsfiles/"))
			
 
				         for file in all_file[0:-3]:
			
 
				             os.remove(f"./{crawler}/{crawler}_chlsfiles/{file}")
			
 
				-        cls.logger(log_type, crawler).info("删除 charles 缓存文件成功")
			
 
				+        cls.logger(log_type, crawler).info("删除 charles 缓存文件成功\n")
			
 
				 
			
 
				     # 保存视频信息至 "./videos/{video_dict['video_title}/info.txt"
			
 
				     @classmethod
			
@@ -145,7 +145,9 @@ class Common:
 
				             # 视频名
			
 
				             video_name = "video.mp4"
			
 
				             try:
			
 
				-                download_cmd = f"yt-dlp -f 'bv[height=720][ext=mp4]+ba[ext=m4a]' --merge-output-format mp4 {video_url} -o {video_name}"
			
 
				+                download_cmd = f'yt-dlp -f "bv[height=720][ext=mp4]+ba[ext=m4a]" --merge-output-format mp4 {video_url}-U -o {video_name}'
			
 
				+                # 'yt-dlp -f "bv[height=720][ext=mp4]+ba[ext=m4a]" --merge-output-format mp4 https://www.youtube.com/watch?v=Q4MtXQY0aHM-U -o video.mp4'
			
 
				+                Common.logger(log_type, crawler).info(f"download_cmd:{download_cmd}")
			
 
				                 os.system(download_cmd)
			
 
				                 move_cmd = f"mv {video_name} {video_dir}"
			
 
				                 os.system(move_cmd)
			
--- a/common/publish.py
+++ b/common/publish.py
@@ -173,7 +173,7 @@ class Publish:
 
				         if env == 'dev':
			
 
				             uids_dev = [6267140, 6267141]
			
 
				             return random.choice(uids_dev)
			
 
				-        elif crawler == 'kanyikan' and env == 'prod' and strategy == 'kanyikan_moment':
			
 
				+        elif crawler == 'kanyikan':
			
 
				             uids_prod_kanyikan_moment = [20631208, 20631209, 20631210, 20631211, 20631212,
			
 
				                                           20631213, 20631214, 20631215, 20631216, 20631217,
			
 
				                                           20631223, 20631224, 20631225, 20631226, 20631227]
			
@@ -232,6 +232,8 @@ class Publish:
 
				             return 'GONGZHONGHAO_XINXIN'
			
 
				         elif crawler == 'weixinzhishu':
			
 
				             return 'WEIXINZHISHU'
			
 
				+        else:
			
 
				+            return "CRAWLER"
			
 
				 
			
 
				     @classmethod
			
 
				     def local_file_path(cls, crawler):
			
--- a/kanyikan/kanyikan_main/run_kanyikan_moment.py
+++ b/kanyikan/kanyikan_main/run_kanyikan_moment.py
@@ -15,13 +15,13 @@ from common.feishu import Feishu
 
				 from kanyikan.kanyikan_moment.kanyikan_moment import Moment
			
 
				 
			
 
				 
			
 
				-def main(log_type, crawler, strategy, our_uid, env, oss_endpoint):
			
 
				+def main(log_type, crawler, strategy, oss_endpoint, env, machine):
			
 
				     """
			
 
				     主函数入口
			
 
				     :param log_type: 日志命名: monent
			
 
				     :param crawler: 哪款爬虫: kanyikan
			
 
				     :param strategy: 爬虫策略: kanyikan_moment
			
 
				-    :param our_uid: 站内 UID: kanyikan_moment
			
 
				+    :param machine: 爬虫运行机器，阿里云服务器: aliyun_hk / aliyun / macpro / macair / local
			
 
				     :param env: 正式环境: prod；测试环境: dev
			
 
				     :param oss_endpoint: 阿里云102服务器: inner ；其它: out
			
 
				     :return: None
			
@@ -33,7 +33,7 @@ def main(log_type, crawler, strategy, our_uid, env, oss_endpoint):
 
				             moment_video_list = Feishu.get_sheet_content(log_type, crawler, 'iK58HX')
			
 
				             for moment_video_id in moment_video_list:
			
 
				                 Common.logger(log_type, crawler).info(f"开始抓取{moment_video_id}朋友圈推荐视频\n")
			
 
				-                Moment.get_videos(log_type, crawler, strategy, our_uid, env, oss_endpoint, moment_video_id)
			
 
				+                Moment.get_videos(log_type, crawler, strategy, oss_endpoint, env, machine, moment_video_id)
			
 
				 
			
 
				             Common.del_logs(log_type, crawler)
			
 
				             Common.logger(log_type, crawler).info("抓取完一轮，休眠 10 秒\n")
			
@@ -48,6 +48,12 @@ if __name__ == "__main__":
 
				     parser.add_argument('--our_uid')  ## 添加参数
			
 
				     parser.add_argument('--oss_endpoint')  ## 添加参数
			
 
				     parser.add_argument('--env')  ## 添加参数
			
 
				+    parser.add_argument('--machine')  ## 添加参数
			
 
				     args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
			
 
				     # print(args)
			
 
				-    main(args.log_type, args.crawler, args.strategy, args.our_uid, args.env, args.oss_endpoint)
			
 
				+    main(log_type=args.log_type,
			
 
				+         crawler=args.crawler,
			
 
				+         strategy=args.strategy,
			
 
				+         oss_endpoint=args.oss_endpoint,
			
 
				+         env=args.env,
			
 
				+         machine=args.machine)
			
--- a/kanyikan/kanyikan_moment/kanyikan_moment.py
+++ b/kanyikan/kanyikan_moment/kanyikan_moment.py
@@ -37,7 +37,7 @@ class Moment:
 
				 
			
 
				     # 获取推荐视频列表
			
 
				     @classmethod
			
 
				-    def get_videos(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint, moment_video_id):
			
 
				+    def get_videos(cls, log_type, crawler, strategy, oss_endpoint, env, machine, moment_video_id):
			
 
				         url = "https://search.weixin.qq.com/cgi-bin/recwxa/snsgetvideoinfo?"
			
 
				         headers = {
			
 
				             "content-type": "application/json",
			
@@ -207,13 +207,13 @@ class Moment:
 
				                     elif video_id in [j for m in Feishu.get_values_batch(log_type, crawler, "20ce0c") for j in m]:
			
 
				                         Common.logger(log_type, crawler).info("视频已下载\n")
			
 
				                     else:
			
 
				-                        cls.download_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint, video_dict)
			
 
				+                        cls.download_publish(log_type, crawler, strategy, oss_endpoint, env, video_dict)
			
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
			
 
				 
			
 
				     # 下载/上传视频
			
 
				     @classmethod
			
 
				-    def download_publish(cls, log_type, crawler, strategy, our_uid, env, oss_endpoint, video_dict):
			
 
				+    def download_publish(cls, log_type, crawler, strategy, oss_endpoint, env, video_dict):
			
 
				         try:
			
 
				             # 过滤空行及空标题视频
			
 
				             if video_dict['video_id'] == 0 \
			
@@ -241,7 +241,12 @@ class Moment:
 
				 
			
 
				                 # 上传视频
			
 
				                 Common.logger(log_type, crawler).info(f"开始上传视频:{video_dict['video_title']}")
			
 
				-                our_video_id = Publish.upload_and_publish(log_type, crawler, strategy, our_uid, env, oss_endpoint)
			
 
				+                our_video_id = Publish.upload_and_publish(log_type=log_type,
			
 
				+                                                          crawler=crawler,
			
 
				+                                                          strategy=strategy,
			
 
				+                                                          oss_endpoint=oss_endpoint,
			
 
				+                                                          our_uid="kanyikan_moment",
			
 
				+                                                          env=env)
			
 
				                 if env == 'dev':
			
 
				                     our_video_link = f"https://testadmin.piaoquantv.com/cms/post-detail/{our_video_id}/info"
			
 
				                 else:
			
--- a/main/main.sh
+++ b/main/main.sh
@@ -48,7 +48,8 @@ ps aux | grep ${grep_str} | grep Python | grep -v grep | awk '{print $2}' | xarg
 
				 echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死！"
			
 
				 
			
 
				 if [ ${machine} = "--machine=aliyun_hk" ];then
			
 
				-  echo "无需更新代码"
			
 
				+  echo "升级yt-dlp"
			
 
				+  pip3 install yt-dlp -U
			
 
				 else
			
 
				   echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
			
 
				   cd ${piaoquan_crawler_dir} && git pull origin master --force && rm -f ${piaoquan_crawler_dir}main/nohup.log && rm -f ${piaoquan_crawler_dir}${nohup_dir}
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ requests==2.27.1
 
				 selenium~=4.2.0
			
 
				 urllib3==1.26.9
			
 
				 emoji~=2.2.0
			
 
				-Appium-Python-Client~=2.8.1
			
 
				+Appium-Python-Client~=2.8.1
			
 
				+atomac~=1.2.0
			
--- a/weixinzhishu/.DS_Store
+++ b/weixinzhishu/.DS_Store
--- a/weixinzhishu/logs/.DS_Store
+++ b/weixinzhishu/logs/.DS_Store
--- a/weixinzhishu/weixinzhishu_main/get_weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/get_weixinzhishu.py
@@ -0,0 +1,250 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/10
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+from datetime import date, timedelta
			
 
				+import requests
			
 
				+import json
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.feishu import Feishu
			
 
				+from common.common import Common
			
 
				+
			
 
				+
			
 
				+class Weixinzhishu:
			
 
				+    pageNum = 1
			
 
				+
			
 
				+    # 获取微信 key / openid
			
 
				+    @classmethod
			
 
				+    def get_wechat_key(cls, log_type, crawler):
			
 
				+        """
			
 
				+        获取微信 key / openid
			
 
				+        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
			
 
				+        :param log_type: 日志名
			
 
				+        :param crawler: 哪款爬虫，填写:weixinzhishu
			
 
				+        :return: search_key, openid
			
 
				+        """
			
 
				+        try:
			
 
				+            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
			
 
				+            for i in range(len(sheet)):
			
 
				+                search_key = sheet[1][1]
			
 
				+                openid = sheet[1][2]
			
 
				+                return search_key, openid
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
			
 
				+
			
 
				+    # 获取热词
			
 
				+    @classmethod
			
 
				+    def get_word(cls, log_type, crawler, host):
			
 
				+        try:
			
 
				+            url = '/hot/word/getAllWords'
			
 
				+            params = {
			
 
				+                'pageNum': cls.pageNum,  # 第几页，默认1，int
			
 
				+                'pageSize': 100  # 请求条目数，默认为100，int
			
 
				+            }
			
 
				+            response = requests.post(url=host+url, json=params)
			
 
				+            cls.pageNum += 1
			
 
				+            if response.status_code != 200:
			
 
				+                Common.logger(log_type, crawler).warning(f"get_word_response:{response.text}\n")
			
 
				+            elif response.json()['message'] != "success":
			
 
				+                Common.logger(log_type, crawler).warning(f"get_word_response:{response.json()}\n")
			
 
				+            else:
			
 
				+                word_list = response.json()['data']['words']
			
 
				+                return word_list
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"get_word:{e}\n")
			
 
				+
			
 
				+    # 获取热词分数
			
 
				+    @classmethod
			
 
				+    def get_word_score(cls, log_type, crawler, word_id, word):
			
 
				+        """
			
 
				+        获取热词分数
			
 
				+        :param log_type: 日志名
			
 
				+        :param crawler: 哪款爬虫，填写:weixinzhishu
			
 
				+        :param word_id: 热词 ID
			
 
				+        :param word: 热词
			
 
				+        :return: 热词 7 天指数，例如：
			
 
				+        {'id': 1,
			
 
				+        'word': '消息',
			
 
				+        'wechatScores': [
			
 
				+        {'score': 95521022, 'scoreDate': '2023-02-07'},
			
 
				+        {'score': 97315283, 'scoreDate': '2023-02-08'},
			
 
				+        {'score': 109845849, 'scoreDate': '2023-02-09'},
			
 
				+        {'score': 107089560, 'scoreDate': '2023-02-10'},
			
 
				+        {'score': 102658391, 'scoreDate': '2023-02-11'},
			
 
				+        {'score': 93843701, 'scoreDate': '2023-02-12'},
			
 
				+        {'score': 100211894, 'scoreDate': '2023-02-13'}]}
			
 
				+        """
			
 
				+        try:
			
 
				+            while True:
			
 
				+                wechat_key = cls.get_wechat_key(log_type, crawler)
			
 
				+                search_key = wechat_key[0]
			
 
				+                openid = wechat_key[-1]
			
 
				+                start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				+                end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				+                url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				+                payload = json.dumps({
			
 
				+                    "openid": openid,
			
 
				+                    "search_key": search_key,
			
 
				+                    "cgi_name": "GetDefaultIndex",
			
 
				+                    "start_ymd": start_ymd,
			
 
				+                    "end_ymd": end_ymd,
			
 
				+                    "query": word
			
 
				+                })
			
 
				+                headers = {
			
 
				+                    'Host': 'search.weixin.qq.com',
			
 
				+                    'content-type': 'application/json',
			
 
				+                    'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
			
 
				+                    'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
			
 
				+                }
			
 
				+                response = requests.request("POST", url=url, headers=headers, data=payload)
			
 
				+                wechat_score_list = []
			
 
				+                word_wechat_score_dict = {
			
 
				+                    "id": word_id,
			
 
				+                    "word": word,
			
 
				+                    "wechatScores": wechat_score_list,
			
 
				+                }
			
 
				+                if response.json()['code'] == -10000:
			
 
				+                    Common.logger(log_type, crawler).warning(f"response:{response.json()['msg']} 休眠 10 秒，重新获取")
			
 
				+                    time.sleep(10)
			
 
				+                elif response.json()['code'] == -10002:
			
 
				+                    Common.logger(log_type, crawler).info(f'{word}:该词暂未收录')
			
 
				+                    # # 数据写入飞书
			
 
				+                    # now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				+                    # values = [[now, word, "该词暂未收录"]]
			
 
				+                    # Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				+                    # time.sleep(0.5)
			
 
				+                    # Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				+                    # Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				+                    return word_wechat_score_dict
			
 
				+                elif response.json()['code'] != 0:
			
 
				+                    Common.logger(log_type, crawler).info(f'response:{response.text}\n')
			
 
				+                    return word_wechat_score_dict
			
 
				+                else:
			
 
				+                    time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
			
 
				+                    for i in range(len(time_index)):
			
 
				+                        score_time = time_index[i]['time']
			
 
				+                        score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
			
 
				+                        score = time_index[i]['score']
			
 
				+                        wechat_score_dict = {"score": score, "scoreDate": score_time_str}
			
 
				+                        wechat_score_list.append(wechat_score_dict)
			
 
				+                        # # 数据写入飞书
			
 
				+                        # now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				+                        # values = [[now, word, score_time_str, score]]
			
 
				+                        # Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				+                        # time.sleep(0.5)
			
 
				+                        # Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				+                        # Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				+                    return word_wechat_score_dict
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"get_word_score异常:{e}\n")
			
 
				+
			
 
				+    # 获取微信指数
			
 
				+    @classmethod
			
 
				+    def get_wechat_score(cls, log_type, crawler, host):
			
 
				+        """
			
 
				+        获取微信指数
			
 
				+        :param log_type: 日志名
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param host: 域名
			
 
				+        :return: 热词指数列表
			
 
				+        """
			
 
				+        while True:
			
 
				+            word_list = cls.get_word(log_type, crawler, host)
			
 
				+            if len(word_list) == 0:
			
 
				+                Common.logger(log_type, crawler).info(f"热词更新完毕\n")
			
 
				+                cls.pageNum = 1
			
 
				+                return
			
 
				+            else:
			
 
				+                wechat_score_data = []
			
 
				+                Common.logger(log_type, crawler).info(f"len(word_list):{len(word_list)}")
			
 
				+                for i in range(len(word_list)):
			
 
				+                    word_id = word_list[i]['id']
			
 
				+                    word = word_list[i]['word']
			
 
				+                    Common.logger(log_type, crawler).info(f"word_id:{word_id}")
			
 
				+                    Common.logger(log_type, crawler).info(f"word:{word}")
			
 
				+                    word_score_dict = cls.get_word_score(log_type, crawler, word_id, word)
			
 
				+                    Common.logger(log_type, crawler).info(f"word_score_dict:{word_score_dict}\n")
			
 
				+                    wechat_score_data.append(word_score_dict)
			
 
				+                Common.logger(log_type, crawler).info(f"wechat_score_data:{wechat_score_data}\n")
			
 
				+                cls.update_wechat_score(log_type, crawler, wechat_score_data, host)
			
 
				+
			
 
				+    # 更新微信指数
			
 
				+    @classmethod
			
 
				+    def update_wechat_score(cls, log_type, crawler, data, host):
			
 
				+        """
			
 
				+        更新热词微信指数
			
 
				+        :param log_type: 日志名
			
 
				+        :param crawler: 哪款爬虫
			
 
				+        :param data: 热词微信指数
			
 
				+        :param host: 域名
			
 
				+        :return: {"code":200, "message":"success"}
			
 
				+        """
			
 
				+        try:
			
 
				+            url = '/hot/word/updateWechatScore'
			
 
				+            params = {'data': data}
			
 
				+            response = requests.post(url=host+url, json=params)
			
 
				+            if response.status_code != 200:
			
 
				+                Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.text}\n")
			
 
				+            elif response.json()["message"] != "success":
			
 
				+                Common.logger(log_type, crawler).warning(f"update_wechat_score_response:{response.json()}\n")
			
 
				+            else:
			
 
				+                Common.logger(log_type, crawler).info(f"更新热词微信指数:{response.json()['message']}\n")
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"update_wechat_score:{e}\n")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_score_test(cls, log_type, crawler, word_id, word):
			
 
				+        wechat_key = cls.get_wechat_key(log_type, crawler)
			
 
				+        search_key = wechat_key[0]
			
 
				+        openid = wechat_key[-1]
			
 
				+        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				+        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				+        url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				+        payload = json.dumps({
			
 
				+            "openid": openid,
			
 
				+            "search_key": search_key,
			
 
				+            "cgi_name": "GetDefaultIndex",
			
 
				+            "start_ymd": start_ymd,
			
 
				+            "end_ymd": end_ymd,
			
 
				+            "query": word
			
 
				+        })
			
 
				+        headers = {
			
 
				+            'Host': 'search.weixin.qq.com',
			
 
				+            'content-type': 'application/json',
			
 
				+            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
			
 
				+            'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
			
 
				+        }
			
 
				+        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+        wechat_score_list = []
			
 
				+        word_wechat_score_dict = {
			
 
				+            "id": word_id,
			
 
				+            "word": word,
			
 
				+            "wechatScores": wechat_score_list,
			
 
				+        }
			
 
				+        if response.json()['code'] == -10000:
			
 
				+            print(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒，重新获取")
			
 
				+            time.sleep(10)
			
 
				+            cls.get_score_test(log_type, crawler, word_id, word)
			
 
				+        elif response.json()['code'] == -10002:
			
 
				+            print("该词暂未收录")
			
 
				+            print(f"{word_wechat_score_dict}")
			
 
				+        elif response.json()['code'] != 0:
			
 
				+            print(f"{word_wechat_score_dict}")
			
 
				+        else:
			
 
				+            time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
			
 
				+            for i in range(len(time_index)):
			
 
				+                score_time = time_index[i]['time']
			
 
				+                score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
			
 
				+                score = time_index[i]['score']
			
 
				+                wechat_score_dict = {"score": score, "scoreDate": score_time_str}
			
 
				+                wechat_score_list.append(wechat_score_dict)
			
 
				+                print(f"wechat_score_dict:{wechat_score_dict}")
			
 
				+            print(word_wechat_score_dict)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    Weixinzhishu.get_score_test('weixin', 'weixinzhishu', 1 , "社保")
			
 
				+
			
 
				+    pass
			
--- a/weixinzhishu/weixinzhishu_main/run_weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/run_weixinzhishu.py
@@ -1,19 +1,24 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/2/13
			
 
				-import argparse
			
 
				+# import argparse
			
 
				 import os
			
 
				 import sys
			
 
				 sys.path.append(os.getcwd())
			
 
				 from common.common import Common
			
 
				-from weixinzhishu.weixinzhishu_main.weixinzhishu import Weixinzhishu
			
 
				+from weixinzhishu.weixinzhishu_main.get_weixinzhishu import Weixinzhishu
			
 
				 
			
 
				 
			
 
				 class Main:
			
 
				     @classmethod
			
 
				-    def main(cls, log_type, crawler):
			
 
				+    def main(cls, log_type, crawler, env):
			
 
				+        if env == "dev":
			
 
				+            host = 'http://testhot-words-internal.piaoquantv.com'
			
 
				+        else:
			
 
				+            host = 'http://hot-words-internal.piaoquantv.com'
			
 
				         Common.logger(log_type, crawler).info("开始抓取微信指数\n")
			
 
				-        Weixinzhishu.update_wechat_score(log_type, crawler)
			
 
				+        Weixinzhishu.get_wechat_score(log_type, crawler, host)
			
 
				+        Common.del_logs(log_type, crawler)
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
@@ -28,5 +33,4 @@ if __name__ == "__main__":
 
				     # args = parser.parse_args()  ### 参数赋值，也可以通过终端赋值
			
 
				     # # print(args)
			
 
				     # Main.main(log_type=args.log_type, crawler=args.crawler)
			
 
				-
			
 
				-    Main.main("weixin", "weixinzhishu")
			
 
				+    Main.main("weixin", "weixinzhishu", "prod")
			
--- a/weixinzhishu/weixinzhishu_main/search_key.py
+++ b/weixinzhishu/weixinzhishu_main/search_key.py
@@ -2,6 +2,7 @@
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/2/10
			
 
				 """
			
 
				+部署机器: Windows 笔记本
			
 
				 获取微信指数小程序请求参数:search_key
			
 
				     1. 启动 WinAppDriver.exe
			
 
				     2. 启动 Charles.exe:
			
@@ -10,7 +11,7 @@
 
				     3. 启动 Python 脚本:
			
 
				         3.1 cd D:\piaoquan_crawler
			
 
				         3.2 python .\weixinzhishu\weixinzhishu_main\search_key.py
			
 
				-每分钟获取最新search_key，写入飞书: https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
			
 
				+每 10 秒获取最新search_key，写入飞书: https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
			
 
				 """
			
 
				 import json
			
 
				 import os
			
@@ -181,7 +182,7 @@ class Searchkey:
 
				 if __name__ == '__main__':
			
 
				     while True:
			
 
				         Searchkey.write_search_key_to_feishu('searchkey', 'weixinzhishu')
			
 
				-        Common.logger('searchkey', 'weixinzhishu').info('休眠 1 分钟')
			
 
				-        time.sleep(60)
			
 
				+        Common.logger('searchkey', 'weixinzhishu').info('休眠 10 秒')
			
 
				+        time.sleep(10)
			
 
				 
			
 
				     # Searchkey.start_wechat('searchkey', 'weixinzhishu')
			
--- a/weixinzhishu/weixinzhishu_main/search_key_mac.py
+++ b/weixinzhishu/weixinzhishu_main/search_key_mac.py
@@ -0,0 +1,26 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/20
			
 
				+import time
			
 
				+
			
 
				+import atomac
			
 
				+
			
 
				+
			
 
				+class SearchKey:
			
 
				+    @classmethod
			
 
				+    def start_wechat(cls):
			
 
				+        bundle_id = "com.tencent.xinWeChat"
			
 
				+        atomac.launchAppByBundleId(bundle_id)
			
 
				+        automator = atomac.getAppRefByBundleId(bundle_id)
			
 
				+        time.sleep(3)
			
 
				+
			
 
				+        window = automator.windows()[0]
			
 
				+
			
 
				+        msg_box = window.findFirstR(AXRole="AXCell", AXIdentifier="MMChatsTableCellView_0")
			
 
				+        print(msg_box.getAttributes())
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    SearchKey.start_wechat()
			
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu.py
@@ -1,277 +0,0 @@
 
				-# -*- coding: utf-8 -*-
			
 
				-# @Author: wangkun
			
 
				-# @Time: 2023/2/10
			
 
				-import os
			
 
				-import sys
			
 
				-import time
			
 
				-from datetime import date, timedelta
			
 
				-import requests
			
 
				-import json
			
 
				-sys.path.append(os.getcwd())
			
 
				-from common.feishu import Feishu
			
 
				-from common.common import Common
			
 
				-
			
 
				-
			
 
				-class Weixinzhishu:
			
 
				-    pageNum = 1
			
 
				-
			
 
				-    # 获取微信 key / openid
			
 
				-    @classmethod
			
 
				-    def get_wechat_key(cls, log_type, crawler):
			
 
				-        """
			
 
				-        获取微信 key / openid
			
 
				-        https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?sheet=sVL74k
			
 
				-        :param log_type: 日志名
			
 
				-        :param crawler: 哪款爬虫，填写:weixinzhishu
			
 
				-        :return: search_key, openid
			
 
				-        """
			
 
				-        try:
			
 
				-            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
			
 
				-            for i in range(len(sheet)):
			
 
				-                search_key = sheet[1][1]
			
 
				-                openid = sheet[1][2]
			
 
				-                return search_key, openid
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
			
 
				-
			
 
				-    # 获取热词
			
 
				-    @classmethod
			
 
				-    def get_word(cls):
			
 
				-        url = '/hot/word/getAllWords'
			
 
				-        params = {
			
 
				-            'pageNum': cls.pageNum,  # 第几页，默认1，int
			
 
				-            'pageSize': 100  # 请求条目数，默认为100，int
			
 
				-        }
			
 
				-        response = requests.post(url=url, json=params)
			
 
				-        cls.pageNum += 1
			
 
				-        word_list = []
			
 
				-        print(response.text)
			
 
				-        return word_list
			
 
				-
			
 
				-    # 获取热词分数
			
 
				-    @classmethod
			
 
				-    def get_word_score(cls, log_type, crawler, word_id, word):
			
 
				-        """
			
 
				-        获取热词分数
			
 
				-        :param log_type: 日志名
			
 
				-        :param crawler: 哪款爬虫，填写:weixinzhishu
			
 
				-        :param word_id: 热词 ID
			
 
				-        :param word: 热词
			
 
				-        :return: 热词 7 天指数，例如：
			
 
				-        {'id': 1,
			
 
				-        'word': '消息',
			
 
				-        'wechatScores': [
			
 
				-        {'score': 95521022, 'scoreDate': '2023-02-07'},
			
 
				-        {'score': 97315283, 'scoreDate': '2023-02-08'},
			
 
				-        {'score': 109845849, 'scoreDate': '2023-02-09'},
			
 
				-        {'score': 107089560, 'scoreDate': '2023-02-10'},
			
 
				-        {'score': 102658391, 'scoreDate': '2023-02-11'},
			
 
				-        {'score': 93843701, 'scoreDate': '2023-02-12'},
			
 
				-        {'score': 100211894, 'scoreDate': '2023-02-13'}]}
			
 
				-        """
			
 
				-        try:
			
 
				-            wechat_key = cls.get_wechat_key(log_type, crawler)
			
 
				-            search_key = wechat_key[0]
			
 
				-            openid = wechat_key[-1]
			
 
				-            end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				-            start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				-            url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				-            payload = json.dumps({
			
 
				-                "openid": openid,
			
 
				-                "search_key": search_key,
			
 
				-                "cgi_name": "GetDefaultIndex",
			
 
				-                "start_ymd": start_ymd,
			
 
				-                "end_ymd": end_ymd,
			
 
				-                "query": word
			
 
				-            })
			
 
				-            headers = {
			
 
				-                'Host': 'search.weixin.qq.com',
			
 
				-                'content-type': 'application/json',
			
 
				-                'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
			
 
				-                'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
			
 
				-            }
			
 
				-            response = requests.request("POST", url, headers=headers, data=payload)
			
 
				-            wechat_score_list = []
			
 
				-            word_wechat_score_dict = {
			
 
				-                "id": word_id,
			
 
				-                "word": word,
			
 
				-                "wechatScores": wechat_score_list,
			
 
				-            }
			
 
				-            if response.json()['code'] == -10000:
			
 
				-                # Common.logger(log_type, crawler).warning(f"response:{response.json()['msg']} 休眠 10 秒，重新获取\n")
			
 
				-                # time.sleep(10)
			
 
				-                # cls.get_word_score(log_type, crawler, word_id, word)
			
 
				-                return None
			
 
				-            elif response.json()['code'] == -10002:
			
 
				-                # Common.logger(log_type, crawler).info(f'{word}:该词暂未收录')
			
 
				-                # # 数据写入飞书
			
 
				-                # now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				-                # values = [[now, word, "该词暂未收录"]]
			
 
				-                # Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				-                # time.sleep(0.5)
			
 
				-                # Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				-                # Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				-                return word_wechat_score_dict
			
 
				-            elif response.json()['code'] != 0:
			
 
				-                Common.logger(log_type, crawler).info(f'response:{response.text}\n')
			
 
				-                return word_wechat_score_dict
			
 
				-            else:
			
 
				-                time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
			
 
				-                for i in range(len(time_index)):
			
 
				-                    score_time = time_index[i]['time']
			
 
				-                    score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
			
 
				-                    score = time_index[i]['score']
			
 
				-                    wechat_score_dict = {"score": score, "scoreDate": score_time_str}
			
 
				-                    wechat_score_list.append(wechat_score_dict)
			
 
				-
			
 
				-                    # # 数据写入飞书
			
 
				-                    # now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				-                    # values = [[now, word, score_time_str, score]]
			
 
				-                    # Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				-                    # time.sleep(0.5)
			
 
				-                    # Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				-                    # Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				-
			
 
				-                return word_wechat_score_dict
			
 
				-        except Exception as e:
			
 
				-            Common.logger(log_type, crawler).error(f"weixinzhishu异常:{e}\n")
			
 
				-
			
 
				-    # 获取微信指数
			
 
				-    @classmethod
			
 
				-    def get_wechat_score(cls, log_type, crawler):
			
 
				-        """
			
 
				-        获取微信指数
			
 
				-        :param log_type: 日志名
			
 
				-        :param crawler: 哪款爬虫
			
 
				-        :return: 热词指数列表
			
 
				-        """
			
 
				-        while True:
			
 
				-            word_list = cls.get_word()
			
 
				-            if len(word_list) == 0:
			
 
				-                Common.logger(log_type, crawler).info(f"热词更新完毕\n")
			
 
				-                cls.pageNum = 1
			
 
				-                return []
			
 
				-            else:
			
 
				-                wechat_score_data = []
			
 
				-                for i in range(len(word_list)):
			
 
				-                    word_id = word_list[i]['Id']
			
 
				-                    word = word_list[i]['word']
			
 
				-                    word_score_dict = cls.get_word_score(log_type, crawler, word_id, word)
			
 
				-                    wechat_score_data.append(word_score_dict)
			
 
				-                    return wechat_score_data
			
 
				-
			
 
				-    # 更新微信指数
			
 
				-    @classmethod
			
 
				-    def update_wechat_score(cls, log_type, crawler):
			
 
				-        """
			
 
				-        更新热词微信指数
			
 
				-        :param log_type: 日志名
			
 
				-        :param crawler: 哪款爬虫
			
 
				-        :return: {"code":200, "message":"success"}
			
 
				-        """
			
 
				-        data = {
			
 
				-            'data': cls.get_wechat_score(log_type, crawler)
			
 
				-        }
			
 
				-
			
 
				-        url = '/hot/word/updateWechatScore'
			
 
				-        params = {
			
 
				-            'data': data
			
 
				-        }
			
 
				-        response = requests.post(url=url, json=params)
			
 
				-        print(response.text)
			
 
				-
			
 
				-    @classmethod
			
 
				-    def update_wechat_score_test(cls, log_type, crawler):
			
 
				-        our_word_list = []
			
 
				-        out_word_list = []
			
 
				-        our_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "nCudsM")
			
 
				-        out_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "D80uEf")
			
 
				-        for x in our_word_sheet:
			
 
				-            for y in x:
			
 
				-                if y is None:
			
 
				-                    pass
			
 
				-                else:
			
 
				-                    our_word_list.append(y)
			
 
				-        for x in out_word_sheet:
			
 
				-            for y in x:
			
 
				-                if y is None:
			
 
				-                    pass
			
 
				-                else:
			
 
				-                    out_word_list.append(y)
			
 
				-        word_list = our_word_list+out_word_list
			
 
				-        word_score_list = []
			
 
				-        # for i in range(len(word_list)):
			
 
				-        for i in range(100):
			
 
				-            while True:
			
 
				-                Common.logger(log_type, crawler).info(f"word_id:{i + 1}, word:{word_list[i]}")
			
 
				-                word_score = cls.get_word_score(log_type, crawler, int(i + 1), word_list[i])
			
 
				-                if word_score is None:
			
 
				-                    Common.logger(log_type, crawler).info("微信key过期，10秒钟后重试")
			
 
				-                    time.sleep(10)
			
 
				-                else:
			
 
				-                    word_score_list.append(word_score)
			
 
				-                    Common.logger(log_type, crawler).info(f'微信指数：{word_score}\n')
			
 
				-                    break
			
 
				-
			
 
				-        word_dict = {
			
 
				-            "data": word_score_list
			
 
				-        }
			
 
				-        return word_dict
			
 
				-
			
 
				-    @classmethod
			
 
				-    def get_score_test(cls, log_type, crawler, word_id, word):
			
 
				-        wechat_key = cls.get_wechat_key(log_type, crawler)
			
 
				-        search_key = wechat_key[0]
			
 
				-        openid = wechat_key[-1]
			
 
				-        end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				-        start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				-        url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				-        payload = json.dumps({
			
 
				-            "openid": openid,
			
 
				-            "search_key": search_key,
			
 
				-            "cgi_name": "GetDefaultIndex",
			
 
				-            "start_ymd": start_ymd,
			
 
				-            "end_ymd": end_ymd,
			
 
				-            "query": word
			
 
				-        })
			
 
				-        headers = {
			
 
				-            'Host': 'search.weixin.qq.com',
			
 
				-            'content-type': 'application/json',
			
 
				-            'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_3_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.32(0x1800202a) NetType/WIFI Language/zh_CN',
			
 
				-            'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
			
 
				-        }
			
 
				-        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				-        wechat_score_list = []
			
 
				-        word_wechat_score_dict = {
			
 
				-            "id": word_id,
			
 
				-            "word": word,
			
 
				-            "wechatScores": wechat_score_list,
			
 
				-        }
			
 
				-        if response.json()['code'] == -10000:
			
 
				-            print(f"{time.strftime('%Y-%m-%d %H:%M:%S', time.localtime(int(time.time())))} response:{response.json()['msg']} 休眠 10 秒，重新获取\n")
			
 
				-            time.sleep(10)
			
 
				-            cls.get_score_test(log_type, crawler, word_id, word)
			
 
				-        elif response.json()['code'] == -10002:
			
 
				-            print("该词暂未收录")
			
 
				-            print(f"{word_wechat_score_dict}")
			
 
				-        elif response.json()['code'] != 0:
			
 
				-            print(f"{word_wechat_score_dict}")
			
 
				-        else:
			
 
				-            time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
			
 
				-            for i in range(len(time_index)):
			
 
				-                score_time = time_index[i]['time']
			
 
				-                score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
			
 
				-                score = time_index[i]['score']
			
 
				-                wechat_score_dict = {"score": score, "scoreDate": score_time_str}
			
 
				-                wechat_score_list.append(wechat_score_dict)
			
 
				-                print(f"wechat_score_dict:{wechat_score_dict}")
			
 
				-            print(word_wechat_score_dict)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    Weixinzhishu.get_score_test('weixin', 'weixinzhishu', 1 , "春晚")
			
 
				-    #
			
 
				-    # word_dict_demo = Weixinzhishu.update_wechat_score_test('weixin', 'weixinzhishu')
			
 
				-    # print(word_dict_demo)
			
 
				-
			
 
				-    pass
			
--- a/xigua/__init__.py
+++ b/xigua/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
--- a/xigua/xigua_follow/__init__.py
+++ b/xigua/xigua_follow/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
--- a/xigua/xigua_follow/xigua_demo.py
+++ b/xigua/xigua_follow/xigua_demo.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
--- a/xigua/xigua_follow/xigua_follow.py
+++ b/xigua/xigua_follow/xigua_follow.py
@@ -0,0 +1,431 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
 
				+import base64
			
 
				+import json
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				+
			
 
				+import requests
			
 
				+import urllib3
			
 
				+from selenium.webdriver import DesiredCapabilities
			
 
				+from selenium.webdriver.chrome.service import Service
			
 
				+from selenium.webdriver.common.by import By
			
 
				+from seleniumwire import webdriver
			
 
				+
			
 
				+sys.path.append(os.getcwd())
			
 
				+from common.common import Common
			
 
				+from common.feishu import Feishu
			
 
				+from common.publish import Publish
			
 
				+proxies = {"http": None, "https": None}
			
 
				+
			
 
				+
			
 
				+class Follow:
			
 
				+    # 个人主页视频翻页参数
			
 
				+    offset = 0
			
 
				+
			
 
				+    # 下载规则
			
 
				+    @staticmethod
			
 
				+    def download_rule(duration, width, height):
			
 
				+        if int(duration) >= 60:
			
 
				+            if int(width) >= 720 or int(height) >= 720:
			
 
				+                return True
			
 
				+            else:
			
 
				+                return False
			
 
				+        else:
			
 
				+            return False
			
 
				+
			
 
				+    # 过滤词库
			
 
				+    @classmethod
			
 
				+    def filter_words(cls, log_type, crawler):
			
 
				+        try:
			
 
				+            filter_words_sheet = Feishu.get_values_batch(log_type, crawler, 'KGB4Hc')
			
 
				+            filter_words_list = []
			
 
				+            for x in filter_words_sheet:
			
 
				+                for y in x:
			
 
				+                    if y is None:
			
 
				+                        pass
			
 
				+                    else:
			
 
				+                        filter_words_list.append(y)
			
 
				+            return filter_words_list
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'filter_words异常:{e}\n')
			
 
				+
			
 
				+    # 获取用户信息（字典格式）. 注意：部分 user_id 字符类型是 int / str
			
 
				+    @classmethod
			
 
				+    def get_user_info_from_feishu(cls, log_type, crawler):
			
 
				+        try:
			
 
				+            user_sheet = Feishu.get_values_batch(log_type, crawler, '5tlTYB')
			
 
				+            user_dict = {}
			
 
				+            for i in range(1, len(user_sheet)):
			
 
				+                user_name = user_sheet[i][0]
			
 
				+                out_id = user_sheet[i][1]
			
 
				+                our_id = user_sheet[i][3]
			
 
				+                if user_name is None or out_id is None or our_id is None:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    user_dict[user_name] = str(out_id) + ',' + str(our_id)
			
 
				+            return user_dict
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'get_user_id_from_feishu异常:{e}\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_signature(cls, log_type, crawler, out_uid, machine):
			
 
				+        try:
			
 
				+            # 打印请求配置
			
 
				+            ca = DesiredCapabilities.CHROME
			
 
				+            ca["goog:loggingPrefs"] = {"performance": "ALL"}
			
 
				+
			
 
				+            # 不打开浏览器运行
			
 
				+            chrome_options = webdriver.ChromeOptions()
			
 
				+            chrome_options.add_argument("--headless")
			
 
				+            chrome_options.add_argument('--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.79 Safari/537.36')
			
 
				+            chrome_options.add_argument("--no-sandbox")
			
 
				+
			
 
				+            # driver初始化
			
 
				+            if machine == 'aliyun' or machine == 'aliyun_hk':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options)
			
 
				+            elif machine == 'macpro':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
			
 
				+                                          service=Service('/Users/lieyunye/Downloads/chromedriver_v86/chromedriver'))
			
 
				+            elif machine == 'macair':
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options,
			
 
				+                                          service=Service('/Users/piaoquan/Downloads/chromedriver'))
			
 
				+            else:
			
 
				+                driver = webdriver.Chrome(desired_capabilities=ca, options=chrome_options, service=Service('/Users/wangkun/Downloads/chromedriver/chromedriver_v110/chromedriver'))
			
 
				+            driver.implicitly_wait(10)
			
 
				+            driver.get(f'https://www.ixigua.com/home/{out_uid}/')
			
 
				+            time.sleep(3)
			
 
				+            data_src = driver.find_elements(By.XPATH, '//img[@class="tt-img BU-MagicImage tt-img-loaded"]')[1].get_attribute("data-src")
			
 
				+            signature = data_src.split("x-signature=")[-1]
			
 
				+            # print(f"data_src:{data_src}")
			
 
				+            # print(f"signature:{signature}")
			
 
				+            return signature
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f'get_signature异常:{e}\n')
			
 
				+
			
 
				+    # 获取视频详情
			
 
				+    @classmethod
			
 
				+    def get_video_url(cls, log_type, crawler, gid):
			
 
				+        # try:
			
 
				+        url = 'https://www.ixigua.com/api/mixVideo/information?'
			
 
				+        headers = {
			
 
				+            "accept-encoding": "gzip, deflate",
			
 
				+            "accept-language": "zh-CN,zh-Hans;q=0.9",
			
 
				+            "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "
			
 
				+                          "AppleWebKit/605.1.15 (KHTML, like Gecko) Version/15.5 Safari/605.1.15",
			
 
				+            "referer": "https://www.ixigua.com/7102614741050196520?logTag=0531c88ac04f38ab2c62",
			
 
				+        }
			
 
				+        params = {
			
 
				+            'mixId': gid,
			
 
				+            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfC'
			
 
				+                       'NVVIOBNjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
			
 
				+            'X-Bogus': 'DFSzswVupYTANCJOSBk0P53WxM-r',
			
 
				+            '_signature': '_02B4Z6wo0000119LvEwAAIDCuktNZ0y5wkdfS7jAALThuOR8D9yWNZ.EmWHKV0WSn6Px'
			
 
				+                          'fPsH9-BldyxVje0f49ryXgmn7Tzk-swEHNb15TiGqa6YF.cX0jW8Eds1TtJOIZyfc9s5emH7gdWN94',
			
 
				+        }
			
 
				+        cookies = {
			
 
				+            'ixigua-a-s': '1',
			
 
				+            'msToken': 'IlG0wd0Pylyw9ghcYiB2YseUmTwrsrqqhXrbIcsSaTcLTJyVlbYJzk20zw3UO-CfrfCNVVIOB'
			
 
				+                       'NjIl7vfBoxnVUwO9ZyzAI3umSKsT5-pef_RRfQCJwmA',
			
 
				+            'ttwid': '1%7C_yXQeHWwLZgCsgHClOwTCdYSOt_MjdOkgnPIkpi-Sr8%7C1661241238%7Cf57d0c5ef3f1d7'
			
 
				+                     '6e049fccdca1ac54887c34d1f8731c8e51a49780ff0ceab9f8',
			
 
				+            'tt_scid': 'QZ4l8KXDG0YAEaMCSbADdcybdKbUfG4BC6S4OBv9lpRS5VyqYLX2bIR8CTeZeGHR9ee3',
			
 
				+            'MONITOR_WEB_ID': '0a49204a-7af5-4e96-95f0-f4bafb7450ad',
			
 
				+            '__ac_nonce': '06304878000964fdad287',
			
 
				+            '__ac_signature': '_02B4Z6wo00f017Rcr3AAAIDCUVxeW1tOKEu0fKvAAI4cvoYzV-wBhq7B6D8k0no7lb'
			
 
				+                              'FlvYoinmtK6UXjRIYPXnahUlFTvmWVtb77jsMkKAXzAEsLE56m36RlvL7ky.M3Xn52r9t1IEb7IR3ke8',
			
 
				+            'ttcid': 'e56fabf6e85d4adf9e4d91902496a0e882',
			
 
				+            '_tea_utm_cache_1300': 'undefined',
			
 
				+            'support_avif': 'false',
			
 
				+            'support_webp': 'false',
			
 
				+            'xiguavideopcwebid': '7134967546256016900',
			
 
				+            'xiguavideopcwebid.sig': 'xxRww5R1VEMJN_dQepHorEu_eAc',
			
 
				+        }
			
 
				+        urllib3.disable_warnings()
			
 
				+        response = requests.get(url=url, headers=headers, params=params, cookies=cookies, verify=False)
			
 
				+        if 'data' not in response.json() or response.json()['data'] == '':
			
 
				+            Common.logger(log_type, crawler).warning('get_video_info: response: {}', response)
			
 
				+        else:
			
 
				+            video_info = response.json()['data']['gidInformation']['packerData']['video']
			
 
				+            video_url_dict = {}
			
 
				+            # video_url
			
 
				+            if 'videoResource' not in video_info:
			
 
				+                video_url_dict["video_url"] = ''
			
 
				+                video_url_dict["audio_url"] = ''
			
 
				+                video_url_dict["video_width"] = 0
			
 
				+                video_url_dict["video_height"] = 0
			
 
				+
			
 
				+            elif 'dash_120fps' in video_info['videoResource']:
			
 
				+                if "video_list" in video_info['videoResource']['dash_120fps'] and len(video_info['videoResource']['dash_120fps']['video_list']) != 0:
			
 
				+                    video_url = video_info['videoResource']['dash_120fps']['video_list'][-1]['backup_url_1']
			
 
				+                    audio_url = video_info['videoResource']['dash_120fps']['video_list'][-1]['backup_url_1']
			
 
				+                    if len(video_url) % 3 == 1:
			
 
				+                        video_url += '=='
			
 
				+                    elif len(video_url) % 3 == 2:
			
 
				+                        video_url += '='
			
 
				+                    elif len(audio_url) % 3 == 1:
			
 
				+                        audio_url += '=='
			
 
				+                    elif len(audio_url) % 3 == 2:
			
 
				+                        audio_url += '='
			
 
				+                    video_url = base64.b64decode(video_url).decode('utf8')
			
 
				+                    audio_url = base64.b64decode(audio_url).decode('utf8')
			
 
				+                    video_width = video_info['videoResource']['dash_120fps']['video_list'][-1]['vwidth']
			
 
				+                    video_height = video_info['videoResource']['dash_120fps']['video_list'][-1]['vheight']
			
 
				+                    video_url_dict["video_url"] = video_url
			
 
				+                    video_url_dict["audio_url"] = audio_url
			
 
				+                    video_url_dict["video_width"] = video_width
			
 
				+                    video_url_dict["video_height"] = video_height
			
 
				+                elif 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
			
 
				+                        and 'dynamic_video' in video_info['videoResource']['dash_120fps'] \
			
 
				+                        and 'dynamic_video_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
			
 
				+                        and 'dynamic_audio_list' in video_info['videoResource']['dash_120fps']['dynamic_video'] \
			
 
				+                        and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list']) != 0 \
			
 
				+                        and len(video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list']) != 0:
			
 
				+
			
 
				+                    video_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
			
 
				+                    audio_url = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
			
 
				+                    if len(video_url) % 3 == 1:
			
 
				+                        video_url += '=='
			
 
				+                    elif len(video_url) % 3 == 2:
			
 
				+                        video_url += '='
			
 
				+                    elif len(audio_url) % 3 == 1:
			
 
				+                        audio_url += '=='
			
 
				+                    elif len(audio_url) % 3 == 2:
			
 
				+                        audio_url += '='
			
 
				+                    video_url = base64.b64decode(video_url).decode('utf8')
			
 
				+                    audio_url = base64.b64decode(audio_url).decode('utf8')
			
 
				+                    video_width = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
			
 
				+                    video_height = video_info['videoResource']['dash_120fps']['dynamic_video']['dynamic_video_list'][-1]['vheight']
			
 
				+                    video_url_dict["video_url"] = video_url
			
 
				+                    video_url_dict["audio_url"] = audio_url
			
 
				+                    video_url_dict["video_width"] = video_width
			
 
				+                    video_url_dict["video_height"] = video_height
			
 
				+
			
 
				+
			
 
				+            elif 'dash' in video_info['videoResource'] \
			
 
				+                    and 'dynamic_video' in video_info['videoResource']['dash'] \
			
 
				+                    and 'dynamic_video_list' in video_info['videoResource']['dash']['dynamic_video']:
			
 
				+                video_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['backup_url_1']
			
 
				+                audio_url = video_info['videoResource']['dash']['dynamic_video']['dynamic_audio_list'][-1]['backup_url_1']
			
 
				+                if len(video_url) % 3 == 1:
			
 
				+                    video_url += '=='
			
 
				+                elif len(video_url) % 3 == 2:
			
 
				+                    video_url += '='
			
 
				+                elif len(audio_url) % 3 == 1:
			
 
				+                    audio_url += '=='
			
 
				+                elif len(audio_url) % 3 == 2:
			
 
				+                    audio_url += '='
			
 
				+                video_url = base64.b64decode(video_url).decode('utf8')
			
 
				+                audio_url = base64.b64decode(audio_url).decode('utf8')
			
 
				+                video_width = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vwidth']
			
 
				+                video_height = video_info['videoResource']['dash']['dynamic_video']['dynamic_video_list'][-1]['vheight']
			
 
				+
			
 
				+
			
 
				+
			
 
				+            elif 'normal' in video_info['videoResource']:
			
 
				+                video_url = video_info['videoResource']['normal']['video_list'][-1]['backup_url_1']
			
 
				+                audio_url = video_info['videoResource']['normal']['video_list'][-1]['backup_url_1']
			
 
				+                if len(video_url) % 3 == 1:
			
 
				+                    video_url += '=='
			
 
				+                elif len(video_url) % 3 == 2:
			
 
				+                    video_url += '='
			
 
				+                elif len(audio_url) % 3 == 1:
			
 
				+                    audio_url += '=='
			
 
				+                elif len(audio_url) % 3 == 2:
			
 
				+                    audio_url += '='
			
 
				+                video_url = base64.b64decode(video_url).decode('utf8')
			
 
				+                audio_url = base64.b64decode(audio_url).decode('utf8')
			
 
				+                video_width = video_info['videoResource']['normal']['video_list'][-1]['vwidth']
			
 
				+                video_height = video_info['videoResource']['normal']['video_list'][-1]['vheight']
			
 
				+            else:
			
 
				+                video_url = 0
			
 
				+                audio_url = 0
			
 
				+                video_width = 0
			
 
				+                video_height = 0
			
 
				+
			
 
				+            return video_url_dict
			
 
				+
			
 
				+
			
 
				+        # except Exception as e:
			
 
				+        #     Common.logger(log_type).error(f'get_video_info异常:{e}\n')
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_videolist(cls, log_type, crawler, out_uid, machine):
			
 
				+        signature = cls.get_signature(log_type, crawler, out_uid, machine)
			
 
				+        url = "https://www.ixigua.com/api/videov2/author/new_video_list?"
			
 
				+        params = {
			
 
				+            'to_user_id': str(out_uid),
			
 
				+            'offset': str(cls.offset),
			
 
				+            'limit': '30',
			
 
				+            'maxBehotTime': '0',
			
 
				+            'order': 'new',
			
 
				+            'isHome': '0',
			
 
				+            'msToken': 'G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==',
			
 
				+            'X-Bogus': 'DFSzswVuEkUANjW9ShFTgR/F6qHt',
			
 
				+            '_signature': signature,
			
 
				+        }
			
 
				+        headers = {
			
 
				+            'authority': 'www.ixigua.com',
			
 
				+            'accept': 'application/json, text/plain, */*',
			
 
				+            'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
			
 
				+            'cache-control': 'no-cache',
			
 
				+            'cookie': f'MONITOR_WEB_ID=7168304743566296612; __ac_signature={signature}; ixigua-a-s=1; support_webp=true; support_avif=false; csrf_session_id=a5355d954d3c63ed1ba35faada452b4d; msToken=G0eRzNkw189a8TLaXjc6nTHVMQwh9XcxVAqTbGKi7iPJdQcLwS3-XRrJ3MZ7QBfqErpxp3EX1WtvWOIcZ3NIgr41hgcd-v64so_RRj3YCRw1UsKW8mIssNLlIMspsg==; tt_scid=o4agqz7u9SKPwfBoPt6S82Cw0q.9KDtqmNe0JHxMqmpxNHQWq1BmrQdgVU6jEoX7ed99; ttwid=1%7CHHtv2QqpSGuSu8r-zXF1QoWsvjmNi1SJrqOrZzg-UCY%7C1676618894%7Cee5ad95378275f282f230a7ffa9947ae7eff40d0829c5a2568672a6dc90a1c96; ixigua-a-s=1',
			
 
				+            'pragma': 'no-cache',
			
 
				+            'referer': f'https://www.ixigua.com/home/{out_uid}/video/?preActiveKey=hotsoon&list_entrance=userdetail',
			
 
				+            'sec-ch-ua': '"Chromium";v="110", "Not A(Brand";v="24", "Microsoft Edge";v="110"',
			
 
				+            'sec-ch-ua-mobile': '?0',
			
 
				+            'sec-ch-ua-platform': '"macOS"',
			
 
				+            'sec-fetch-dest': 'empty',
			
 
				+            'sec-fetch-mode': 'cors',
			
 
				+            'sec-fetch-site': 'same-origin',
			
 
				+            'user-agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.0.0 Safari/537.36 Edg/110.0.1587.41',
			
 
				+            'x-secsdk-csrf-token': '00010000000119e3f9454d1dcbb288704cda1960f241e2d19bd21f2fd283520c3615a990ac5a17448bfbb902a249'
			
 
				+        }
			
 
				+        urllib3.disable_warnings()
			
 
				+        response = requests.get(url=url, headers=headers, params=params, proxies=proxies, verify=False)
			
 
				+        cls.offset += 30
			
 
				+        if response.status_code != 200:
			
 
				+            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
			
 
				+        elif 'data' not in response.text:
			
 
				+            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.text}\n")
			
 
				+        elif 'videoList' not in response.json()["data"]:
			
 
				+            Common.logger(log_type, crawler).warning(f"get_videolist_response:{response.json()}\n")
			
 
				+        else:
			
 
				+            videoList = response.json()['data']['videoList']
			
 
				+            for i in range(len(videoList)):
			
 
				+                # video_title
			
 
				+                if 'title' not in videoList[i]:
			
 
				+                    video_title = 0
			
 
				+                else:
			
 
				+                    video_title = videoList[i]['title'].strip().replace('手游', '') \
			
 
				+                        .replace('/', '').replace('\/', '').replace('\n', '')
			
 
				+
			
 
				+                # video_id
			
 
				+                if 'video_id' not in videoList[i]:
			
 
				+                    video_id = 0
			
 
				+                else:
			
 
				+                    video_id = videoList[i]['video_id']
			
 
				+
			
 
				+                # gid
			
 
				+                if 'gid' not in videoList[i]:
			
 
				+                    gid = 0
			
 
				+                else:
			
 
				+                    gid = videoList[i]['gid']
			
 
				+
			
 
				+                # play_cnt
			
 
				+                if 'video_detail_info' not in videoList[i]:
			
 
				+                    play_cnt = 0
			
 
				+                elif 'video_watch_count' not in videoList[i]['video_detail_info']:
			
 
				+                    play_cnt = 0
			
 
				+                else:
			
 
				+                    play_cnt = videoList[i]['video_detail_info']['video_watch_count']
			
 
				+
			
 
				+                # comment_cnt
			
 
				+                if 'comment_count' not in videoList[i]:
			
 
				+                    comment_cnt = 0
			
 
				+                else:
			
 
				+                    comment_cnt = videoList[i]['comment_count']
			
 
				+
			
 
				+                # like_cnt
			
 
				+                if 'digg_count' not in videoList[i]:
			
 
				+                    like_cnt = 0
			
 
				+                else:
			
 
				+                    like_cnt = videoList[i]['digg_count']
			
 
				+
			
 
				+                # share_cnt
			
 
				+                share_cnt = 0
			
 
				+
			
 
				+                # video_duration
			
 
				+                if 'video_duration' not in videoList[i]:
			
 
				+                    video_duration = 0
			
 
				+                else:
			
 
				+                    video_duration = videoList[i]['video_duration']
			
 
				+
			
 
				+                # send_time
			
 
				+                if 'publish_time' not in videoList[i]:
			
 
				+                    publish_time = 0
			
 
				+                else:
			
 
				+                    publish_time = videoList[i]['publish_time']
			
 
				+
			
 
				+                # is_top
			
 
				+                if 'is_top' not in videoList[i]:
			
 
				+                    is_top = 0
			
 
				+                else:
			
 
				+                    is_top = videoList[i]['is_top']
			
 
				+
			
 
				+                # user_name
			
 
				+                if 'user_info' not in videoList[i]:
			
 
				+                    user_name = 0
			
 
				+                elif 'name' not in videoList[i]['user_info']:
			
 
				+                    user_name = 0
			
 
				+                else:
			
 
				+                    user_name = videoList[i]['user_info']['name']
			
 
				+
			
 
				+                # user_id
			
 
				+                if 'user_info' not in videoList[i]:
			
 
				+                    user_id = 0
			
 
				+                elif 'user_id' not in videoList[i]['user_info']:
			
 
				+                    user_id = 0
			
 
				+                else:
			
 
				+                    user_id = videoList[i]['user_info']['user_id']
			
 
				+
			
 
				+                # avatar_url
			
 
				+                if 'user_info' not in videoList[i]:
			
 
				+                    avatar_url = 0
			
 
				+                elif 'avatar_url' not in videoList[i]['user_info']:
			
 
				+                    avatar_url = 0
			
 
				+                else:
			
 
				+                    avatar_url = videoList[i]['user_info']['avatar_url']
			
 
				+
			
 
				+                # cover_url
			
 
				+                if 'video_detail_info' not in videoList[i]:
			
 
				+                    cover_url = 0
			
 
				+                elif 'detail_video_large_image' not in videoList[i]['video_detail_info']:
			
 
				+                    cover_url = 0
			
 
				+                elif 'url' in videoList[i]['video_detail_info']['detail_video_large_image']:
			
 
				+                    cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url']
			
 
				+                else:
			
 
				+                    cover_url = videoList[i]['video_detail_info']['detail_video_large_image']['url_list'][0]['url']
			
 
				+
			
 
				+                Common.logger(log_type, crawler).info(
			
 
				+                    f'send_time:{time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(publish_time))}')
			
 
				+
			
 
				+                video_url_dict = cls.get_video_url(log_type, crawler, gid)
			
 
				+                video_url = video_url_dict["video_url"]
			
 
				+                audio_url = video_url_dict["audio_url"]
			
 
				+                video_width = video_url_dict["video_width"]
			
 
				+                video_height = video_url_dict["video_height"]
			
 
				+
			
 
				+                video_dict = {'video_title': video_title,
			
 
				+                              'video_id': video_id,
			
 
				+                              'gid': gid,
			
 
				+                              'play_cnt': play_cnt,
			
 
				+                              'comment_cnt': comment_cnt,
			
 
				+                              'like_cnt': like_cnt,
			
 
				+                              'share_cnt': share_cnt,
			
 
				+                              'video_width': video_width,
			
 
				+                              'video_height': video_height,
			
 
				+                              'video_duration': video_duration,
			
 
				+                              'publish_time': publish_time,
			
 
				+                              'is_top': is_top,
			
 
				+                              'user_name': user_name,
			
 
				+                              'user_id': user_id,
			
 
				+                              'avatar_url': avatar_url,
			
 
				+                              'cover_url': cover_url,
			
 
				+                              'audio_url': audio_url,
			
 
				+                              'video_url': video_url}
			
 
				+                for k, v in video_dict.items():
			
 
				+                    print(f"{k}:{v}")
			
 
				+                print("\n")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    # print(Follow.get_signature("follow", "xigua", "95420624045", "local"))
			
 
				+    Follow.get_videolist("follow", "xigua", "95420624045", "local")
			
 
				+
			
 
				+
			
 
				+    pass
			
--- a/xigua/xigua_main/__init__.py
+++ b/xigua/xigua_main/__init__.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
--- a/xigua/xigua_main/run_xigua_follow.py
+++ b/xigua/xigua_main/run_xigua_follow.py
@@ -0,0 +1,3 @@
 
				+# -*- coding: utf-8 -*-
			
 
				+# @Author: wangkun
			
 
				+# @Time: 2023/2/17
			
--- a/youtube/youtube_follow/youtube_follow.py
+++ b/youtube/youtube_follow/youtube_follow.py
@@ -337,6 +337,8 @@ class Follow:
 
				                                 out_fans = header['subscriberCountText']['accessibility']['accessibilityData']['label']
			
 
				                                 if '万' in out_fans:
			
 
				                                     out_fans = int(float(out_fans.split('万')[0])*10000)
			
 
				+                                elif "位" in out_fans:
			
 
				+                                    out_fans = int(out_fans.split('位')[0].replace(",", ""))
			
 
				                                 else:
			
 
				                                     pass
			
 
				 
			
@@ -421,14 +423,14 @@ class Follow:
 
				                         }
			
 
				                         our_uid = Users.create_user(log_type, crawler, create_user_dict, env)
			
 
				                         Common.logger(log_type, crawler).info(f'新创建的站内UID:{our_uid}')
			
 
				-                        if env == 'prod':
			
 
				-                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				-                        else:
			
 
				+                        if env == 'dev':
			
 
				                             our_user_link = f'https://testadmin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				+                        else:
			
 
				+                            our_user_link = f'https://admin.piaoquantv.com/ums/user/{our_uid}/post'
			
 
				                         Common.logger(log_type, crawler).info(f'站内用户主页链接:{our_user_link}')
			
 
				                         Feishu.update_values(log_type, crawler, sheetid, f'G{i + 1}:H{i + 1}', [[our_uid, our_user_link]])
			
 
				                         Common.logger(log_type, crawler).info(f'站内用户信息写入飞书成功！')
			
 
				-                        Common.logger(log_type, crawler).info(f'sql:{sql}')
			
 
				+
			
 
				                         sql = f""" insert into crawler_user(user_id, 
			
 
				                                             out_user_id, 
			
 
				                                             out_user_name, 
			
@@ -447,6 +449,7 @@ class Follow:
 
				                                             {out_fans}, 
			
 
				                                             "{cls.platform}",
			
 
				                                             "{tag}") """
			
 
				+                        Common.logger(log_type, crawler).info(f'sql:{sql}')
			
 
				                         MysqlHelper.update_values(log_type, crawler, sql, env, machine)
			
 
				                         Common.logger(log_type, crawler).info('用户信息插入数据库成功！\n')
			
 
				                     # 数据库中（youtube + out_user_id）返回数量 != 0，则直接把数据库中的站内 UID 写入飞书
			
@@ -699,10 +702,10 @@ class Follow:
 
				                         video_dict = cls.get_video_info(log_type, crawler, out_uid, video_id, machine)
			
 
				                         # 发布时间<=30天
			
 
				                         publish_time = int(time.mktime(time.strptime(video_dict['publish_time'], "%Y-%m-%d")))
			
 
				-                        if int(time.time()) - publish_time <= 3600*24*30:
			
 
				+                        if int(time.time()) - publish_time <= 3600*24*180:
			
 
				                             cls.download_publish(log_type, crawler, video_dict, strategy, our_uid, env, oss_endpoint, machine)
			
 
				                         else:
			
 
				-                            Common.logger(log_type, crawler).info('发布时间超过30天\n')
			
 
				+                            Common.logger(log_type, crawler).info('发布时间超过180天\n')
			
 
				                             return
			
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
			
@@ -912,7 +915,9 @@ class Follow:
 
				                 if 'title' not in  videoDetails:
			
 
				                     video_title = ''
			
 
				                 else:
			
 
				-                    video_title = videoDetails['title']
			
 
				+                    video_title = videoDetails['title'].replace("&", "").strip().replace("\n", "") \
			
 
				+                            .replace("/", "").replace("\r", "").replace("#", "") \
			
 
				+                            .replace(".", "。").replace("\\", "").replace("&NBSP", "")
			
 
				                 video_title = cls.filter_emoji(video_title)
			
 
				                 # if Translate.is_contains_chinese(video_title) is False:
			
 
				                 video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
			
@@ -1008,7 +1013,7 @@ class Follow:
 
				             # repeat_video = MysqlHelper.get_values(log_type, crawler, sql, env, machine)
			
 
				             if video_dict['video_title'] == '' or  video_dict['video_url'] == '':
			
 
				                 Common.logger(log_type, crawler).info('无效视频\n')
			
 
				-            elif video_dict['duration'] > 600 or video_dict['duration'] < 60:
			
 
				+            elif video_dict['duration'] > 1200 or video_dict['duration'] < 60:
			
 
				                 Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足规则\n")
			
 
				             # elif repeat_video is not None and len(repeat_video) != 0:
			
 
				             elif cls.repeat_video(log_type, crawler, video_dict['video_id'], env, machine) != 0:
			
@@ -1020,16 +1025,18 @@ class Follow:
 
				                 Common.logger(log_type, crawler).info('开始下载视频...')
			
 
				                 # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
			
 
				                 Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
			
 
				-                ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
			
 
				-                video_width = int(ffmpeg_dict['width'])
			
 
				-                video_height = int(ffmpeg_dict['height'])
			
 
				-                duration = int(ffmpeg_dict['duration'])
			
 
				-                video_size = int(ffmpeg_dict['size'])
			
 
				+                # ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
			
 
				+                # video_width = int(ffmpeg_dict['width'])
			
 
				+                video_width = 1280
			
 
				+                # video_height = int(ffmpeg_dict['height'])
			
 
				+                video_height = 720
			
 
				+                duration = int(video_dict['duration'])
			
 
				+                # video_size = int(ffmpeg_dict['size'])
			
 
				 
			
 
				                 Common.logger(log_type, crawler).info(f'video_width:{video_width}')
			
 
				                 Common.logger(log_type, crawler).info(f'video_height:{video_height}')
			
 
				                 Common.logger(log_type, crawler).info(f'duration:{duration}')
			
 
				-                Common.logger(log_type, crawler).info(f'video_size:{video_size}\n')
			
 
				+                # Common.logger(log_type, crawler).info(f'video_size:{video_size}\n')
			
 
				 
			
 
				                 video_dict['video_width'] = video_width
			
 
				                 video_dict['video_height'] = video_height
			
@@ -1045,7 +1052,7 @@ class Follow:
 
				                 #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				                 #     Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
			
 
				                 #     return
			
 
				-                if video_size == 0 or duration == 0 or video_size is None or duration is None:
			
 
				+                if duration == 0  or duration is None:
			
 
				                     # 删除视频文件夹
			
 
				                     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				                     Common.logger(log_type, crawler).info(f"视频下载出错，删除成功\n")