2 سال پیش · 5f9a8dbe5b
--- a/.DS_Store
+++ b/.DS_Store
--- a/common/common.py
+++ b/common/common.py
@@ -139,6 +139,20 @@ class Common:
 
				             except Exception as e:
			
 
				                 cls.logger(log_type, crawler).error(f"视频下载失败：{e}\n")
			
 
				 
			
 
				+        elif text == "youtube_video":
			
 
				+            # 需要下载的视频地址
			
 
				+            video_url = url
			
 
				+            # 视频名
			
 
				+            video_name = "video.mp4"
			
 
				+            try:
			
 
				+                download_cmd = f"yt-dlp -f 'bv[height=720][ext=mp4]+ba[ext=m4a]' --merge-output-format mp4 {video_url} -o {video_name}"
			
 
				+                os.system(download_cmd)
			
 
				+                move_cmd = f"mv {video_name} {video_dir}"
			
 
				+                os.system(move_cmd)
			
 
				+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
			
 
				+            except Exception as e:
			
 
				+                Common.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
			
 
				+
			
 
				         # 下载音频
			
 
				         elif text == "audio":
			
 
				             # 需要下载的视频地址
			
--- a/common/feishu.py
+++ b/common/feishu.py
@@ -66,6 +66,8 @@ class Feishu:
 
				     crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
			
 
				     # 微信指数
			
 
				     weixinzhishu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?'
			
 
				+    # 微信指数_搜索词
			
 
				+    weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
			
 
				 
			
 
				     # 手机号
			
 
				     wangkun = "13426262515"
			
@@ -127,6 +129,8 @@ class Feishu:
 
				             return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
			
 
				         elif crawler == 'weixinzhishu':
			
 
				             return 'shtcnqhMRUGunIfGnGXMOBYiy4K'
			
 
				+        elif crawler == 'weixinzhishu_search_word':
			
 
				+            return 'shtcnHxCj6dZBYMuK1Q3tIJVlqg'
			
 
				 
			
 
				     # 获取飞书api token
			
 
				     @classmethod
			
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,7 @@ oss2==2.15.0
 
				 psutil==5.9.2
			
 
				 PyMySQL==1.0.2
			
 
				 requests==2.27.1
			
 
				-selenium==4.8.0
			
 
				+selenium~=4.2.0
			
 
				 urllib3==1.26.9
			
 
				+emoji~=2.2.0
			
 
				+Appium-Python-Client~=2.8.1
			
--- a/weixinzhishu/.DS_Store
+++ b/weixinzhishu/.DS_Store
--- a/weixinzhishu/weixinzhishu_chlsfiles/charles202302131147.txt
+++ b/weixinzhishu/weixinzhishu_chlsfiles/charles202302131147.txt
--- a/weixinzhishu/weixinzhishu_main/demo.py
+++ b/weixinzhishu/weixinzhishu_main/demo.py
@@ -3,41 +3,35 @@
 
				 # @Time: 2023/2/13
			
 
				 import json
			
 
				 import os
			
 
				+from datetime import date, timedelta
			
 
				 
			
 
				 
			
 
				 class Demo:
			
 
				     @classmethod
			
 
				-    def demo1(cls):
			
 
				-        # charles 抓包文件保存目录
			
 
				-        chlsfile_path = f"../weixinzhishu_chlsfiles/"
			
 
				-        if len(os.listdir(chlsfile_path)) == 0:
			
 
				-            print("chlsfile文件夹为空")
			
 
				-        else:
			
 
				-            print(f"chlsfile_list:{sorted(os.listdir(chlsfile_path))}")
			
 
				-            # 获取最新的 chlsfile
			
 
				-            chlsfile = sorted(os.listdir(chlsfile_path))[-1]
			
 
				-            # 分离文件名与扩展名
			
 
				-            new_file = os.path.splitext(chlsfile)
			
 
				-
			
 
				-            # 重命名文件后缀
			
 
				-            os.rename(os.path.join(chlsfile_path, chlsfile),
			
 
				-                      os.path.join(chlsfile_path, new_file[0] + ".txt"))
			
 
				-
			
 
				-            with open(f"{chlsfile_path}{new_file[0]}.txt", encoding='utf-8-sig', errors='ignore') as f:
			
 
				-                contents = json.load(f, strict=False)
			
 
				-
			
 
				-            if "search.weixin.qq.com" not in [text['host'] for text in contents]:
			
 
				-                return "未找到search_key"
			
 
				-            else:
			
 
				-                for content in contents:
			
 
				-                    if content["host"] == "search.weixin.qq.com" and content[
			
 
				-                        "path"] == "/cgi-bin/wxaweb/wxindexgetusergroup":
			
 
				-                        print(f"content:{content}")
			
 
				-                        text = content['request']['body']['text']
			
 
				-                        search_key = json.loads(text)['search_key']
			
 
				-                        openid = json.loads(text)['openid']
			
 
				-                        return search_key, openid
			
 
				+    def test_time(cls):
			
 
				+        time1 = 20230207
			
 
				+        time2 = f"{str(time1)[:4]}-{str(time1)[4:6]}-{str(time1)[6:]}"
			
 
				+        print(time2)
			
 
				+        time3 = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				+        print(time3)
			
 
				 
			
 
				 
			
 
				+dict2 = {'id': 1,
			
 
				+         'word': '消息',
			
 
				+         'wechatScores': [{'score': 95521022, 'scoreDate': '2023-02-07'},
			
 
				+                          {'score': 97315283, 'scoreDate': '2023-02-08'},
			
 
				+                          {'score': 109845849, 'scoreDate': '2023-02-09'},
			
 
				+                          {'score': 107089560, 'scoreDate': '2023-02-10'},
			
 
				+                          {'score': 102658391, 'scoreDate': '2023-02-11'},
			
 
				+                          {'score': 93843701, 'scoreDate': '2023-02-12'},
			
 
				+                          {'score': 100211894, 'scoreDate': '2023-02-13'}]}
			
 
				+
			
 
				+response = {'code': -10002, 'content': {'resp_list': []}}
			
 
				+
			
 
				+dict3 = {'id':1, 'word': '出大', 'wechatScores': []}
			
 
				+
			
 
				 if __name__ == "__main__":
			
 
				-    print(Demo.demo1())
			
 
				+
			
 
				+    Demo.test_time()
			
 
				+
			
 
				+    pass
			
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu.py
@@ -1,12 +1,15 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 # @Author: wangkun
			
 
				 # @Time: 2023/2/10
			
 
				+import os
			
 
				+import sys
			
 
				+import time
			
 
				 from datetime import date, timedelta
			
 
				-
			
 
				 import requests
			
 
				 import json
			
 
				-
			
 
				+sys.path.append(os.getcwd())
			
 
				 from common.feishu import Feishu
			
 
				+from common.common import Common
			
 
				 
			
 
				 
			
 
				 class Weixinzhishu:
			
@@ -16,26 +19,23 @@ class Weixinzhishu:
 
				 
			
 
				     @classmethod
			
 
				     def wechat_key(cls, log_type, crawler):
			
 
				-        sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
			
 
				-        for i in range(len(sheet)):
			
 
				-            search_key = sheet[1][1]
			
 
				-            openid = sheet[1][2]
			
 
				-            return search_key, openid
			
 
				+        try:
			
 
				+            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
			
 
				+            for i in range(len(sheet)):
			
 
				+                search_key = sheet[1][1]
			
 
				+                openid = sheet[1][2]
			
 
				+                return search_key, openid
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
			
 
				 
			
 
				     @classmethod
			
 
				-    def weixinzhishu(cls, log_type, crawler):
			
 
				-        search_word_list = cls.search_word()
			
 
				-        wechat_key = cls.wechat_key(log_type, crawler)
			
 
				-        search_key = wechat_key[0]
			
 
				-        openid = wechat_key[-1]
			
 
				-        start_ymd = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d").replace("-", "")
			
 
				-        end_ymd = (date.today() + timedelta(days=-8)).strftime("%Y-%m-%d").replace("-", "")
			
 
				-        print(f"search_key:{search_key}")
			
 
				-        print(f"openid:{openid}")
			
 
				-        print(f"start_ymd:{start_ymd}")
			
 
				-        print(f"start_ymd:{end_ymd}")
			
 
				-        for word in search_word_list:
			
 
				-            print(f"word:{word}")
			
 
				+    def weixinzhishu(cls, log_type, crawler, word_id, word):
			
 
				+        try:
			
 
				+            wechat_key = cls.wechat_key(log_type, crawler)
			
 
				+            search_key = wechat_key[0]
			
 
				+            openid = wechat_key[-1]
			
 
				+            end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
			
 
				+            start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
			
 
				             url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
			
 
				             payload = json.dumps({
			
 
				                 "openid": openid,
			
@@ -52,13 +52,89 @@ class Weixinzhishu:
 
				                 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
			
 
				             }
			
 
				             response = requests.request("POST", url, headers=headers, data=payload)
			
 
				-            if response.json()['code'] == -10000:
			
 
				-                print(response.text)
			
 
				+            if response.json()['code'] != 0 and response.json()['code'] != -10002:
			
 
				+                Common.logger(log_type, crawler).warning(f"response:{response.text}\n")
			
 
				+            elif response.json()['code'] == -10002:
			
 
				+                # 数据写入飞书
			
 
				+                now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				+                values = [[now, word, "该词暂未收录"]]
			
 
				+                Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				+                time.sleep(0.5)
			
 
				+                Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				+                Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				+
			
 
				+                word_wechat_score_dict = {
			
 
				+                    "id": word_id,
			
 
				+                    "word": word,
			
 
				+                    "wechatScores": [],
			
 
				+                }
			
 
				+                # print(word_wechat_score_dict)
			
 
				+                return word_wechat_score_dict
			
 
				             else:
			
 
				-                print(response.text)
			
 
				                 time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
			
 
				-                print(time_index)
			
 
				+                wechat_score_list = []
			
 
				+                for i in range(len(time_index)):
			
 
				+                    score_time = time_index[i]['time']
			
 
				+                    score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
			
 
				+                    score = time_index[i]['score']
			
 
				+                    wechat_score_dict = {"score": score, "scoreDate": score_time_str}
			
 
				+                    wechat_score_list.append(wechat_score_dict)
			
 
				+
			
 
				+                    # 数据写入飞书
			
 
				+                    now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
			
 
				+                    values = [[now, word, score_time_str, score]]
			
 
				+                    Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
			
 
				+                    time.sleep(0.5)
			
 
				+                    Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
			
 
				+                    Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
			
 
				+
			
 
				+                word_wechat_score_dict = {
			
 
				+                    "id": word_id,
			
 
				+                    "word": word,
			
 
				+                    "wechatScores": wechat_score_list,
			
 
				+                }
			
 
				+                # print(word_wechat_score_dict)
			
 
				+                return word_wechat_score_dict
			
 
				+        except Exception as e:
			
 
				+            Common.logger(log_type, crawler).error(f"weixinzhishu异常:{e}\n")
			
 
				+
			
 
				+    @classmethod
			
 
				+    def get_weixinzhishu(cls, log_type, crawler):
			
 
				+        our_word_list = []
			
 
				+        out_word_list = []
			
 
				+        our_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "nCudsM")
			
 
				+        out_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "D80uEf")
			
 
				+        for x in our_word_sheet:
			
 
				+            for y in x:
			
 
				+                if y is None:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    our_word_list.append(y)
			
 
				+        for x in out_word_sheet:
			
 
				+            for y in x:
			
 
				+                if y is None:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    out_word_list.append(y)
			
 
				+        word_list = our_word_list+out_word_list
			
 
				+        word_score_list = []
			
 
				+        # for i in range(len(word_list)):
			
 
				+        for i in range(100):
			
 
				+            word_score = cls.weixinzhishu(log_type, crawler, int(i+1), word_list[i])
			
 
				+            word_score_list.append(word_score)
			
 
				+            Common.logger(log_type, crawler).info(f'"{word_list[i]}"微信指数：{word_score}\n')
			
 
				+
			
 
				+        word_dict = {
			
 
				+            "data": word_score_list
			
 
				+        }
			
 
				+        return word_dict
			
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu')
			
 
				+    # word_dict = Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu', 1, "出大")
			
 
				+    # print(word_dict)
			
 
				+
			
 
				+    word_dict_demo = Weixinzhishu.get_weixinzhishu('weixin', 'weixinzhishu')
			
 
				+    print(word_dict_demo)
			
 
				+
			
 
				+    pass
			
--- a/youtube/youtube_follow/youtube_follow.py
+++ b/youtube/youtube_follow/youtube_follow.py
@@ -7,10 +7,12 @@ YouTube 定向榜
 
				     2. 10分钟>=时长>=1分钟
			
 
				 """
			
 
				 import os
			
 
				+import re
			
 
				 import shutil
			
 
				 import sys
			
 
				 import time
			
 
				 import json
			
 
				+# import emoji
			
 
				 import requests
			
 
				 from selenium import webdriver
			
 
				 from selenium.webdriver.chrome.service import Service
			
@@ -701,6 +703,15 @@ class Follow:
 
				         except Exception as e:
			
 
				             Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
			
 
				 
			
 
				+    @classmethod
			
 
				+    def filter_emoji(cls, title):
			
 
				+        # 过滤表情
			
 
				+        try:
			
 
				+            co = re.compile(u'[\U00010000-\U0010ffff]')
			
 
				+        except re.error:
			
 
				+            co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
			
 
				+        return co.sub("", title)
			
 
				+
			
 
				     @classmethod
			
 
				     def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
			
 
				         try:
			
@@ -891,15 +902,16 @@ class Follow:
 
				             else:
			
 
				                 playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
			
 
				                 videoDetails = response.json()['videoDetails']
			
 
				-                streamingData = response.json()['streamingData']
			
 
				+                # streamingData = response.json()['streamingData']
			
 
				 
			
 
				                 # video_title
			
 
				                 if 'title' not in  videoDetails:
			
 
				                     video_title = ''
			
 
				                 else:
			
 
				                     video_title = videoDetails['title']
			
 
				-                if Translate.is_contains_chinese(video_title) is False:
			
 
				-                    video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
			
 
				+                video_title = cls.filter_emoji(video_title)
			
 
				+                # if Translate.is_contains_chinese(video_title) is False:
			
 
				+                video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
			
 
				 
			
 
				                 if 'lengthSeconds' not in videoDetails:
			
 
				                     duration = 0
			
@@ -945,14 +957,15 @@ class Follow:
 
				                     cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
			
 
				 
			
 
				                 # video_url
			
 
				-                if 'formats' not in streamingData:
			
 
				-                    video_url = ''
			
 
				-                elif len(streamingData['formats']) == 0:
			
 
				-                    video_url = ''
			
 
				-                elif 'url' not in streamingData['formats'][-1]:
			
 
				-                    video_url = ''
			
 
				-                else:
			
 
				-                    video_url = streamingData['formats'][-1]['url']
			
 
				+                # if 'formats' not in streamingData:
			
 
				+                #     video_url = ''
			
 
				+                # elif len(streamingData['formats']) == 0:
			
 
				+                #     video_url = ''
			
 
				+                # elif 'url' not in streamingData['formats'][-1]:
			
 
				+                #     video_url = ''
			
 
				+                # else:
			
 
				+                #     video_url = streamingData['formats'][-1]['url']
			
 
				+                video_url = f"https://www.youtube.com/watch?v={video_id}"
			
 
				 
			
 
				                 Common.logger(log_type, crawler).info(f'video_title:{video_title}')
			
 
				                 Common.logger(log_type, crawler).info(f'video_id:{video_id}')
			
@@ -994,7 +1007,8 @@ class Follow:
 
				             else:
			
 
				                 # 下载视频
			
 
				                 Common.logger(log_type, crawler).info('开始下载视频...')
			
 
				-                Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
			
 
				+                # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
			
 
				+                Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
			
 
				                 ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
			
 
				                 video_width = int(ffmpeg_dict['width'])
			
 
				                 video_height = int(ffmpeg_dict['height'])
			
@@ -1015,12 +1029,12 @@ class Follow:
 
				                 video_dict['avatar_url'] = video_dict['cover_url']
			
 
				                 video_dict['session'] = f'youtube{int(time.time())}'
			
 
				                 rule='1,2'
			
 
				-                if duration < 60 or duration > 600:
			
 
				-                    # 删除视频文件夹
			
 
				-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				-                    Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
			
 
				-                    return
			
 
				-                elif video_size == 0 or duration == 0 or video_size is None or duration is None:
			
 
				+                # if duration < 60 or duration > 600:
			
 
				+                #     # 删除视频文件夹
			
 
				+                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				+                #     Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
			
 
				+                #     return
			
 
				+                if video_size == 0 or duration == 0 or video_size is None or duration is None:
			
 
				                     # 删除视频文件夹
			
 
				                     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
			
 
				                     Common.logger(log_type, crawler).info(f"视频下载出错，删除成功\n")
			
@@ -1120,9 +1134,10 @@ class Follow:
 
				 
			
 
				 
			
 
				 if __name__ == "__main__":
			
 
				-    print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
			
 
				+    # print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
			
 
				     # print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
			
 
				     # Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
			
 
				     # Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
			
 
				     # Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
			
 
				+    print(Follow.filter_emoji("姐妹倆一唱一和，完美配合，終於把大慶降服了😅😅#萌娃搞笑日常"))
			
 
				     pass