2 年之前 · 5f9a8dbe5b
--- a/.DS_Store
+++ b/.DS_Store
--- a/common/common.py
+++ b/common/common.py
@@ -139,6 +139,20 @@ class Common:
 
															             except Exception as e:
														
 
															                 cls.logger(log_type, crawler).error(f"视频下载失败：{e}\n")
														
 
															+        elif text == "youtube_video":
														
 
															+            # 需要下载的视频地址
														
 
															+            video_url = url
														
 
															+            # 视频名
														
 
															+            video_name = "video.mp4"
														
 
															+            try:
														
 
															+                download_cmd = f"yt-dlp -f 'bv[height=720][ext=mp4]+ba[ext=m4a]' --merge-output-format mp4 {video_url} -o {video_name}"
														
 
															+                os.system(download_cmd)
														
 
															+                move_cmd = f"mv {video_name} {video_dir}"
														
 
															+                os.system(move_cmd)
														
 
															+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
														
 
															+            except Exception as e:
														
 
															+                Common.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
														
 
															+
														
 
															         # 下载音频
														
 
															         elif text == "audio":
														
 
															             # 需要下载的视频地址
														
--- a/common/feishu.py
+++ b/common/feishu.py
@@ -66,6 +66,8 @@ class Feishu:
 
															     crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
														
 
															     # 微信指数
														
 
															     weixinzhishu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?'
														
 
															+    # 微信指数_搜索词
														
 
															+    weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
														
 
															     # 手机号
														
 
															     wangkun = "13426262515"
														
@@ -127,6 +129,8 @@ class Feishu:
 
															             return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
														
 
															         elif crawler == 'weixinzhishu':
														
 
															             return 'shtcnqhMRUGunIfGnGXMOBYiy4K'
														
 
															+        elif crawler == 'weixinzhishu_search_word':
														
 
															+            return 'shtcnHxCj6dZBYMuK1Q3tIJVlqg'
														
 
															     # 获取飞书api token
														
 
															     @classmethod
														
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,7 @@ oss2==2.15.0
 
															 psutil==5.9.2
														
 
															 PyMySQL==1.0.2
														
 
															 requests==2.27.1
														
 
															-selenium==4.8.0
														
 
															+selenium~=4.2.0
														
 
															 urllib3==1.26.9
														
 
															+emoji~=2.2.0
														
 
															+Appium-Python-Client~=2.8.1
														
--- a/weixinzhishu/.DS_Store
+++ b/weixinzhishu/.DS_Store
--- a/weixinzhishu/weixinzhishu_chlsfiles/charles202302131147.txt
+++ b/weixinzhishu/weixinzhishu_chlsfiles/charles202302131147.txt
--- a/weixinzhishu/weixinzhishu_main/demo.py
+++ b/weixinzhishu/weixinzhishu_main/demo.py
@@ -3,41 +3,35 @@
 
															 # @Time: 2023/2/13
														
 
															 import json
														
 
															 import os
														
 
															+from datetime import date, timedelta
														
 
															 class Demo:
														
 
															     @classmethod
														
 
															-    def demo1(cls):
														
 
															-        # charles 抓包文件保存目录
														
 
															-        chlsfile_path = f"../weixinzhishu_chlsfiles/"
														
 
															-        if len(os.listdir(chlsfile_path)) == 0:
														
 
															-            print("chlsfile文件夹为空")
														
 
															-        else:
														
 
															-            print(f"chlsfile_list:{sorted(os.listdir(chlsfile_path))}")
														
 
															-            # 获取最新的 chlsfile
														
 
															-            chlsfile = sorted(os.listdir(chlsfile_path))[-1]
														
 
															-            # 分离文件名与扩展名
														
 
															-            new_file = os.path.splitext(chlsfile)
														
 
															-
														
 
															-            # 重命名文件后缀
														
 
															-            os.rename(os.path.join(chlsfile_path, chlsfile),
														
 
															-                      os.path.join(chlsfile_path, new_file[0] + ".txt"))
														
 
															-
														
 
															-            with open(f"{chlsfile_path}{new_file[0]}.txt", encoding='utf-8-sig', errors='ignore') as f:
														
 
															-                contents = json.load(f, strict=False)
														
 
															-
														
 
															-            if "search.weixin.qq.com" not in [text['host'] for text in contents]:
														
 
															-                return "未找到search_key"
														
 
															-            else:
														
 
															-                for content in contents:
														
 
															-                    if content["host"] == "search.weixin.qq.com" and content[
														
 
															-                        "path"] == "/cgi-bin/wxaweb/wxindexgetusergroup":
														
 
															-                        print(f"content:{content}")
														
 
															-                        text = content['request']['body']['text']
														
 
															-                        search_key = json.loads(text)['search_key']
														
 
															-                        openid = json.loads(text)['openid']
														
 
															-                        return search_key, openid
														
 
															+    def test_time(cls):
														
 
															+        time1 = 20230207
														
 
															+        time2 = f"{str(time1)[:4]}-{str(time1)[4:6]}-{str(time1)[6:]}"
														
 
															+        print(time2)
														
 
															+        time3 = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
														
 
															+        print(time3)
														
 
															+dict2 = {'id': 1,
														
 
															+         'word': '消息',
														
 
															+         'wechatScores': [{'score': 95521022, 'scoreDate': '2023-02-07'},
														
 
															+                          {'score': 97315283, 'scoreDate': '2023-02-08'},
														
 
															+                          {'score': 109845849, 'scoreDate': '2023-02-09'},
														
 
															+                          {'score': 107089560, 'scoreDate': '2023-02-10'},
														
 
															+                          {'score': 102658391, 'scoreDate': '2023-02-11'},
														
 
															+                          {'score': 93843701, 'scoreDate': '2023-02-12'},
														
 
															+                          {'score': 100211894, 'scoreDate': '2023-02-13'}]}
														
 
															+
														
 
															+response = {'code': -10002, 'content': {'resp_list': []}}
														
 
															+
														
 
															+dict3 = {'id':1, 'word': '出大', 'wechatScores': []}
														
 
															+
														
 
															 if __name__ == "__main__":
														
 
															-    print(Demo.demo1())
														
 
															+
														
 
															+    Demo.test_time()
														
 
															+
														
 
															+    pass
														
--- a/weixinzhishu/weixinzhishu_main/weixinzhishu.py
+++ b/weixinzhishu/weixinzhishu_main/weixinzhishu.py
@@ -1,12 +1,15 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Author: wangkun
														
 
															 # @Time: 2023/2/10
														
 
															+import os
														
 
															+import sys
														
 
															+import time
														
 
															 from datetime import date, timedelta
														
 
															-
														
 
															 import requests
														
 
															 import json
														
 
															-
														
 
															+sys.path.append(os.getcwd())
														
 
															 from common.feishu import Feishu
														
 
															+from common.common import Common
														
 
															 class Weixinzhishu:
														
@@ -16,26 +19,23 @@ class Weixinzhishu:
 
															     @classmethod
														
 
															     def wechat_key(cls, log_type, crawler):
														
 
															-        sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
														
 
															-        for i in range(len(sheet)):
														
 
															-            search_key = sheet[1][1]
														
 
															-            openid = sheet[1][2]
														
 
															-            return search_key, openid
														
 
															+        try:
														
 
															+            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
														
 
															+            for i in range(len(sheet)):
														
 
															+                search_key = sheet[1][1]
														
 
															+                openid = sheet[1][2]
														
 
															+                return search_key, openid
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
														
 
															     @classmethod
														
 
															-    def weixinzhishu(cls, log_type, crawler):
														
 
															-        search_word_list = cls.search_word()
														
 
															-        wechat_key = cls.wechat_key(log_type, crawler)
														
 
															-        search_key = wechat_key[0]
														
 
															-        openid = wechat_key[-1]
														
 
															-        start_ymd = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d").replace("-", "")
														
 
															-        end_ymd = (date.today() + timedelta(days=-8)).strftime("%Y-%m-%d").replace("-", "")
														
 
															-        print(f"search_key:{search_key}")
														
 
															-        print(f"openid:{openid}")
														
 
															-        print(f"start_ymd:{start_ymd}")
														
 
															-        print(f"start_ymd:{end_ymd}")
														
 
															-        for word in search_word_list:
														
 
															-            print(f"word:{word}")
														
 
															+    def weixinzhishu(cls, log_type, crawler, word_id, word):
														
 
															+        try:
														
 
															+            wechat_key = cls.wechat_key(log_type, crawler)
														
 
															+            search_key = wechat_key[0]
														
 
															+            openid = wechat_key[-1]
														
 
															+            end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
														
 
															+            start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
														
 
															             url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
														
 
															             payload = json.dumps({
														
 
															                 "openid": openid,
														
@@ -52,13 +52,89 @@ class Weixinzhishu:
 
															                 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
														
 
															             }
														
 
															             response = requests.request("POST", url, headers=headers, data=payload)
														
 
															-            if response.json()['code'] == -10000:
														
 
															-                print(response.text)
														
 
															+            if response.json()['code'] != 0 and response.json()['code'] != -10002:
														
 
															+                Common.logger(log_type, crawler).warning(f"response:{response.text}\n")
														
 
															+            elif response.json()['code'] == -10002:
														
 
															+                # 数据写入飞书
														
 
															+                now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
														
 
															+                values = [[now, word, "该词暂未收录"]]
														
 
															+                Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
														
 
															+                time.sleep(0.5)
														
 
															+                Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
														
 
															+                Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
														
 
															+
														
 
															+                word_wechat_score_dict = {
														
 
															+                    "id": word_id,
														
 
															+                    "word": word,
														
 
															+                    "wechatScores": [],
														
 
															+                }
														
 
															+                # print(word_wechat_score_dict)
														
 
															+                return word_wechat_score_dict
														
 
															             else:
														
 
															-                print(response.text)
														
 
															                 time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
														
 
															-                print(time_index)
														
 
															+                wechat_score_list = []
														
 
															+                for i in range(len(time_index)):
														
 
															+                    score_time = time_index[i]['time']
														
 
															+                    score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
														
 
															+                    score = time_index[i]['score']
														
 
															+                    wechat_score_dict = {"score": score, "scoreDate": score_time_str}
														
 
															+                    wechat_score_list.append(wechat_score_dict)
														
 
															+
														
 
															+                    # 数据写入飞书
														
 
															+                    now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
														
 
															+                    values = [[now, word, score_time_str, score]]
														
 
															+                    Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
														
 
															+                    time.sleep(0.5)
														
 
															+                    Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
														
 
															+                    Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
														
 
															+
														
 
															+                word_wechat_score_dict = {
														
 
															+                    "id": word_id,
														
 
															+                    "word": word,
														
 
															+                    "wechatScores": wechat_score_list,
														
 
															+                }
														
 
															+                # print(word_wechat_score_dict)
														
 
															+                return word_wechat_score_dict
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type, crawler).error(f"weixinzhishu异常:{e}\n")
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_weixinzhishu(cls, log_type, crawler):
														
 
															+        our_word_list = []
														
 
															+        out_word_list = []
														
 
															+        our_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "nCudsM")
														
 
															+        out_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "D80uEf")
														
 
															+        for x in our_word_sheet:
														
 
															+            for y in x:
														
 
															+                if y is None:
														
 
															+                    pass
														
 
															+                else:
														
 
															+                    our_word_list.append(y)
														
 
															+        for x in out_word_sheet:
														
 
															+            for y in x:
														
 
															+                if y is None:
														
 
															+                    pass
														
 
															+                else:
														
 
															+                    out_word_list.append(y)
														
 
															+        word_list = our_word_list+out_word_list
														
 
															+        word_score_list = []
														
 
															+        # for i in range(len(word_list)):
														
 
															+        for i in range(100):
														
 
															+            word_score = cls.weixinzhishu(log_type, crawler, int(i+1), word_list[i])
														
 
															+            word_score_list.append(word_score)
														
 
															+            Common.logger(log_type, crawler).info(f'"{word_list[i]}"微信指数：{word_score}\n')
														
 
															+
														
 
															+        word_dict = {
														
 
															+            "data": word_score_list
														
 
															+        }
														
 
															+        return word_dict
														
 
															 if __name__ == "__main__":
														
 
															-    Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu')
														
 
															+    # word_dict = Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu', 1, "出大")
														
 
															+    # print(word_dict)
														
 
															+
														
 
															+    word_dict_demo = Weixinzhishu.get_weixinzhishu('weixin', 'weixinzhishu')
														
 
															+    print(word_dict_demo)
														
 
															+
														
 
															+    pass
														
--- a/youtube/youtube_follow/youtube_follow.py
+++ b/youtube/youtube_follow/youtube_follow.py
@@ -7,10 +7,12 @@ YouTube 定向榜
 
															     2. 10分钟>=时长>=1分钟
														
 
															 """
														
 
															 import os
														
 
															+import re
														
 
															 import shutil
														
 
															 import sys
														
 
															 import time
														
 
															 import json
														
 
															+# import emoji
														
 
															 import requests
														
 
															 from selenium import webdriver
														
 
															 from selenium.webdriver.chrome.service import Service
														
@@ -701,6 +703,15 @@ class Follow:
 
															         except Exception as e:
														
 
															             Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
														
 
															+    @classmethod
														
 
															+    def filter_emoji(cls, title):
														
 
															+        # 过滤表情
														
 
															+        try:
														
 
															+            co = re.compile(u'[\U00010000-\U0010ffff]')
														
 
															+        except re.error:
														
 
															+            co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
														
 
															+        return co.sub("", title)
														
 
															+
														
 
															     @classmethod
														
 
															     def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
														
 
															         try:
														
@@ -891,15 +902,16 @@ class Follow:
 
															             else:
														
 
															                 playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
														
 
															                 videoDetails = response.json()['videoDetails']
														
 
															-                streamingData = response.json()['streamingData']
														
 
															+                # streamingData = response.json()['streamingData']
														
 
															                 # video_title
														
 
															                 if 'title' not in  videoDetails:
														
 
															                     video_title = ''
														
 
															                 else:
														
 
															                     video_title = videoDetails['title']
														
 
															-                if Translate.is_contains_chinese(video_title) is False:
														
 
															-                    video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
														
 
															+                video_title = cls.filter_emoji(video_title)
														
 
															+                # if Translate.is_contains_chinese(video_title) is False:
														
 
															+                video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
														
 
															                 if 'lengthSeconds' not in videoDetails:
														
 
															                     duration = 0
														
@@ -945,14 +957,15 @@ class Follow:
 
															                     cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
														
 
															                 # video_url
														
 
															-                if 'formats' not in streamingData:
														
 
															-                    video_url = ''
														
 
															-                elif len(streamingData['formats']) == 0:
														
 
															-                    video_url = ''
														
 
															-                elif 'url' not in streamingData['formats'][-1]:
														
 
															-                    video_url = ''
														
 
															-                else:
														
 
															-                    video_url = streamingData['formats'][-1]['url']
														
 
															+                # if 'formats' not in streamingData:
														
 
															+                #     video_url = ''
														
 
															+                # elif len(streamingData['formats']) == 0:
														
 
															+                #     video_url = ''
														
 
															+                # elif 'url' not in streamingData['formats'][-1]:
														
 
															+                #     video_url = ''
														
 
															+                # else:
														
 
															+                #     video_url = streamingData['formats'][-1]['url']
														
 
															+                video_url = f"https://www.youtube.com/watch?v={video_id}"
														
 
															                 Common.logger(log_type, crawler).info(f'video_title:{video_title}')
														
 
															                 Common.logger(log_type, crawler).info(f'video_id:{video_id}')
														
@@ -994,7 +1007,8 @@ class Follow:
 
															             else:
														
 
															                 # 下载视频
														
 
															                 Common.logger(log_type, crawler).info('开始下载视频...')
														
 
															-                Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
														
 
															+                # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
														
 
															+                Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
														
 
															                 ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
														
 
															                 video_width = int(ffmpeg_dict['width'])
														
 
															                 video_height = int(ffmpeg_dict['height'])
														
@@ -1015,12 +1029,12 @@ class Follow:
 
															                 video_dict['avatar_url'] = video_dict['cover_url']
														
 
															                 video_dict['session'] = f'youtube{int(time.time())}'
														
 
															                 rule='1,2'
														
 
															-                if duration < 60 or duration > 600:
														
 
															-                    # 删除视频文件夹
														
 
															-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
														
 
															-                    Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
														
 
															-                    return
														
 
															-                elif video_size == 0 or duration == 0 or video_size is None or duration is None:
														
 
															+                # if duration < 60 or duration > 600:
														
 
															+                #     # 删除视频文件夹
														
 
															+                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
														
 
															+                #     Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则，删除成功\n")
														
 
															+                #     return
														
 
															+                if video_size == 0 or duration == 0 or video_size is None or duration is None:
														
 
															                     # 删除视频文件夹
														
 
															                     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
														
 
															                     Common.logger(log_type, crawler).info(f"视频下载出错，删除成功\n")
														
@@ -1120,9 +1134,10 @@ class Follow:
 
															 if __name__ == "__main__":
														
 
															-    print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
														
 
															+    # print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
														
 
															     # print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
														
 
															     # Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
														
 
															     # Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
														
 
															     # Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
														
 
															+    print(Follow.filter_emoji("姐妹倆一唱一和，完美配合，終於把大慶降服了😅😅#萌娃搞笑日常"))
														
 
															     pass