wangkun 2 سال پیش
والد
کامیت
5f9a8dbe5b

BIN
.DS_Store


+ 14 - 0
common/common.py

@@ -139,6 +139,20 @@ class Common:
             except Exception as e:
                 cls.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
 
+        elif text == "youtube_video":
+            # 需要下载的视频地址
+            video_url = url
+            # 视频名
+            video_name = "video.mp4"
+            try:
+                download_cmd = f"yt-dlp -f 'bv[height=720][ext=mp4]+ba[ext=m4a]' --merge-output-format mp4 {video_url} -o {video_name}"
+                os.system(download_cmd)
+                move_cmd = f"mv {video_name} {video_dir}"
+                os.system(move_cmd)
+                cls.logger(log_type, crawler).info("==========视频下载完成==========")
+            except Exception as e:
+                Common.logger(log_type, crawler).error(f"视频下载失败:{e}\n")
+
         # 下载音频
         elif text == "audio":
             # 需要下载的视频地址

+ 4 - 0
common/feishu.py

@@ -66,6 +66,8 @@ class Feishu:
     crawler_youtube = 'https://w42nne6hzg.feishu.cn/sheets/shtcnrLyr1zbYbhhZyqpN7Xrd5f?'
     # 微信指数
     weixinzhishu = 'https://w42nne6hzg.feishu.cn/sheets/shtcnqhMRUGunIfGnGXMOBYiy4K?'
+    # 微信指数_搜索词
+    weixinzhishu_search_word = 'https://w42nne6hzg.feishu.cn/sheets/shtcnHxCj6dZBYMuK1Q3tIJVlqg?'
 
     # 手机号
     wangkun = "13426262515"
@@ -127,6 +129,8 @@ class Feishu:
             return 'shtcnrLyr1zbYbhhZyqpN7Xrd5f'
         elif crawler == 'weixinzhishu':
             return 'shtcnqhMRUGunIfGnGXMOBYiy4K'
+        elif crawler == 'weixinzhishu_search_word':
+            return 'shtcnHxCj6dZBYMuK1Q3tIJVlqg'
 
     # 获取飞书api token
     @classmethod

+ 3 - 1
requirements.txt

@@ -5,5 +5,7 @@ oss2==2.15.0
 psutil==5.9.2
 PyMySQL==1.0.2
 requests==2.27.1
-selenium==4.8.0
+selenium~=4.2.0
 urllib3==1.26.9
+emoji~=2.2.0
+Appium-Python-Client~=2.8.1

BIN
weixinzhishu/.DS_Store


تفاوت فایلی نمایش داده نمی شود زیرا این فایل بسیار بزرگ است
+ 0 - 0
weixinzhishu/weixinzhishu_chlsfiles/charles202302131147.txt


+ 25 - 31
weixinzhishu/weixinzhishu_main/demo.py

@@ -3,41 +3,35 @@
 # @Time: 2023/2/13
 import json
 import os
+from datetime import date, timedelta
 
 
 class Demo:
     @classmethod
-    def demo1(cls):
-        # charles 抓包文件保存目录
-        chlsfile_path = f"../weixinzhishu_chlsfiles/"
-        if len(os.listdir(chlsfile_path)) == 0:
-            print("chlsfile文件夹为空")
-        else:
-            print(f"chlsfile_list:{sorted(os.listdir(chlsfile_path))}")
-            # 获取最新的 chlsfile
-            chlsfile = sorted(os.listdir(chlsfile_path))[-1]
-            # 分离文件名与扩展名
-            new_file = os.path.splitext(chlsfile)
-
-            # 重命名文件后缀
-            os.rename(os.path.join(chlsfile_path, chlsfile),
-                      os.path.join(chlsfile_path, new_file[0] + ".txt"))
-
-            with open(f"{chlsfile_path}{new_file[0]}.txt", encoding='utf-8-sig', errors='ignore') as f:
-                contents = json.load(f, strict=False)
-
-            if "search.weixin.qq.com" not in [text['host'] for text in contents]:
-                return "未找到search_key"
-            else:
-                for content in contents:
-                    if content["host"] == "search.weixin.qq.com" and content[
-                        "path"] == "/cgi-bin/wxaweb/wxindexgetusergroup":
-                        print(f"content:{content}")
-                        text = content['request']['body']['text']
-                        search_key = json.loads(text)['search_key']
-                        openid = json.loads(text)['openid']
-                        return search_key, openid
+    def test_time(cls):
+        time1 = 20230207
+        time2 = f"{str(time1)[:4]}-{str(time1)[4:6]}-{str(time1)[6:]}"
+        print(time2)
+        time3 = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
+        print(time3)
 
 
+dict2 = {'id': 1,
+         'word': '消息',
+         'wechatScores': [{'score': 95521022, 'scoreDate': '2023-02-07'},
+                          {'score': 97315283, 'scoreDate': '2023-02-08'},
+                          {'score': 109845849, 'scoreDate': '2023-02-09'},
+                          {'score': 107089560, 'scoreDate': '2023-02-10'},
+                          {'score': 102658391, 'scoreDate': '2023-02-11'},
+                          {'score': 93843701, 'scoreDate': '2023-02-12'},
+                          {'score': 100211894, 'scoreDate': '2023-02-13'}]}
+
+response = {'code': -10002, 'content': {'resp_list': []}}
+
+dict3 = {'id':1, 'word': '出大', 'wechatScores': []}
+
 if __name__ == "__main__":
-    print(Demo.demo1())
+
+    Demo.test_time()
+
+    pass

+ 101 - 25
weixinzhishu/weixinzhishu_main/weixinzhishu.py

@@ -1,12 +1,15 @@
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Time: 2023/2/10
+import os
+import sys
+import time
 from datetime import date, timedelta
-
 import requests
 import json
-
+sys.path.append(os.getcwd())
 from common.feishu import Feishu
+from common.common import Common
 
 
 class Weixinzhishu:
@@ -16,26 +19,23 @@ class Weixinzhishu:
 
     @classmethod
     def wechat_key(cls, log_type, crawler):
-        sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
-        for i in range(len(sheet)):
-            search_key = sheet[1][1]
-            openid = sheet[1][2]
-            return search_key, openid
+        try:
+            sheet = Feishu.get_values_batch(log_type, crawler, 'sVL74k')
+            for i in range(len(sheet)):
+                search_key = sheet[1][1]
+                openid = sheet[1][2]
+                return search_key, openid
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f"wechat_key:{e}\n")
 
     @classmethod
-    def weixinzhishu(cls, log_type, crawler):
-        search_word_list = cls.search_word()
-        wechat_key = cls.wechat_key(log_type, crawler)
-        search_key = wechat_key[0]
-        openid = wechat_key[-1]
-        start_ymd = (date.today() + timedelta(days=-1)).strftime("%Y-%m-%d").replace("-", "")
-        end_ymd = (date.today() + timedelta(days=-8)).strftime("%Y-%m-%d").replace("-", "")
-        print(f"search_key:{search_key}")
-        print(f"openid:{openid}")
-        print(f"start_ymd:{start_ymd}")
-        print(f"start_ymd:{end_ymd}")
-        for word in search_word_list:
-            print(f"word:{word}")
+    def weixinzhishu(cls, log_type, crawler, word_id, word):
+        try:
+            wechat_key = cls.wechat_key(log_type, crawler)
+            search_key = wechat_key[0]
+            openid = wechat_key[-1]
+            end_ymd = (date.today() + timedelta(days=0)).strftime("%Y%m%d")
+            start_ymd = (date.today() + timedelta(days=-7)).strftime("%Y%m%d")
             url = "https://search.weixin.qq.com/cgi-bin/wxaweb/wxindex"
             payload = json.dumps({
                 "openid": openid,
@@ -52,13 +52,89 @@ class Weixinzhishu:
                 'Referer': 'https://servicewechat.com/wxc026e7662ec26a3a/42/page-frame.html'
             }
             response = requests.request("POST", url, headers=headers, data=payload)
-            if response.json()['code'] == -10000:
-                print(response.text)
+            if response.json()['code'] != 0 and response.json()['code'] != -10002:
+                Common.logger(log_type, crawler).warning(f"response:{response.text}\n")
+            elif response.json()['code'] == -10002:
+                # 数据写入飞书
+                now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
+                values = [[now, word, "该词暂未收录"]]
+                Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
+                time.sleep(0.5)
+                Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
+                Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
+
+                word_wechat_score_dict = {
+                    "id": word_id,
+                    "word": word,
+                    "wechatScores": [],
+                }
+                # print(word_wechat_score_dict)
+                return word_wechat_score_dict
             else:
-                print(response.text)
                 time_index = response.json()['content']['resp_list'][0]['indexes'][0]['time_indexes']
-                print(time_index)
+                wechat_score_list = []
+                for i in range(len(time_index)):
+                    score_time = time_index[i]['time']
+                    score_time_str = f"{str(score_time)[:4]}-{str(score_time)[4:6]}-{str(score_time)[6:]}"
+                    score = time_index[i]['score']
+                    wechat_score_dict = {"score": score, "scoreDate": score_time_str}
+                    wechat_score_list.append(wechat_score_dict)
+
+                    # 数据写入飞书
+                    now = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time())))
+                    values = [[now, word, score_time_str, score]]
+                    Feishu.insert_columns(log_type, crawler, "5011a2", "ROWS", 1, 2)
+                    time.sleep(0.5)
+                    Feishu.update_values(log_type, crawler, "5011a2", "F2:Z2", values)
+                    Common.logger(log_type, crawler).info(f'热词"{word}"微信指数数据写入飞书成功\n')
+
+                word_wechat_score_dict = {
+                    "id": word_id,
+                    "word": word,
+                    "wechatScores": wechat_score_list,
+                }
+                # print(word_wechat_score_dict)
+                return word_wechat_score_dict
+        except Exception as e:
+            Common.logger(log_type, crawler).error(f"weixinzhishu异常:{e}\n")
+
+    @classmethod
+    def get_weixinzhishu(cls, log_type, crawler):
+        our_word_list = []
+        out_word_list = []
+        our_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "nCudsM")
+        out_word_sheet = Feishu.get_values_batch(log_type, 'weixinzhishu_search_word', "D80uEf")
+        for x in our_word_sheet:
+            for y in x:
+                if y is None:
+                    pass
+                else:
+                    our_word_list.append(y)
+        for x in out_word_sheet:
+            for y in x:
+                if y is None:
+                    pass
+                else:
+                    out_word_list.append(y)
+        word_list = our_word_list+out_word_list
+        word_score_list = []
+        # for i in range(len(word_list)):
+        for i in range(100):
+            word_score = cls.weixinzhishu(log_type, crawler, int(i+1), word_list[i])
+            word_score_list.append(word_score)
+            Common.logger(log_type, crawler).info(f'"{word_list[i]}"微信指数:{word_score}\n')
+
+        word_dict = {
+            "data": word_score_list
+        }
+        return word_dict
 
 
 if __name__ == "__main__":
-    Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu')
+    # word_dict = Weixinzhishu.weixinzhishu('weixin', 'weixinzhishu', 1, "出大")
+    # print(word_dict)
+
+    word_dict_demo = Weixinzhishu.get_weixinzhishu('weixin', 'weixinzhishu')
+    print(word_dict_demo)
+
+    pass

+ 34 - 19
youtube/youtube_follow/youtube_follow.py

@@ -7,10 +7,12 @@ YouTube 定向榜
     2. 10分钟>=时长>=1分钟
 """
 import os
+import re
 import shutil
 import sys
 import time
 import json
+# import emoji
 import requests
 from selenium import webdriver
 from selenium.webdriver.chrome.service import Service
@@ -701,6 +703,15 @@ class Follow:
         except Exception as e:
             Common.logger(log_type, crawler).error(f"get_videos异常:{e}\n")
 
+    @classmethod
+    def filter_emoji(cls, title):
+        # 过滤表情
+        try:
+            co = re.compile(u'[\U00010000-\U0010ffff]')
+        except re.error:
+            co = re.compile(u'[\uD800-\uDBFF][\uDC00-\uDFFF]')
+        return co.sub("", title)
+
     @classmethod
     def get_video_info(cls, log_type, crawler, out_uid, video_id, machine):
         try:
@@ -891,15 +902,16 @@ class Follow:
             else:
                 playerMicroformatRenderer = response.json()['microformat']['playerMicroformatRenderer']
                 videoDetails = response.json()['videoDetails']
-                streamingData = response.json()['streamingData']
+                # streamingData = response.json()['streamingData']
 
                 # video_title
                 if 'title' not in  videoDetails:
                     video_title = ''
                 else:
                     video_title = videoDetails['title']
-                if Translate.is_contains_chinese(video_title) is False:
-                    video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
+                video_title = cls.filter_emoji(video_title)
+                # if Translate.is_contains_chinese(video_title) is False:
+                video_title = Translate.google_translate(video_title, machine)  # 自动翻译标题为中文
 
                 if 'lengthSeconds' not in videoDetails:
                     duration = 0
@@ -945,14 +957,15 @@ class Follow:
                     cover_url = videoDetails['thumbnail']['thumbnails'][-1]['url']
 
                 # video_url
-                if 'formats' not in streamingData:
-                    video_url = ''
-                elif len(streamingData['formats']) == 0:
-                    video_url = ''
-                elif 'url' not in streamingData['formats'][-1]:
-                    video_url = ''
-                else:
-                    video_url = streamingData['formats'][-1]['url']
+                # if 'formats' not in streamingData:
+                #     video_url = ''
+                # elif len(streamingData['formats']) == 0:
+                #     video_url = ''
+                # elif 'url' not in streamingData['formats'][-1]:
+                #     video_url = ''
+                # else:
+                #     video_url = streamingData['formats'][-1]['url']
+                video_url = f"https://www.youtube.com/watch?v={video_id}"
 
                 Common.logger(log_type, crawler).info(f'video_title:{video_title}')
                 Common.logger(log_type, crawler).info(f'video_id:{video_id}')
@@ -994,7 +1007,8 @@ class Follow:
             else:
                 # 下载视频
                 Common.logger(log_type, crawler).info('开始下载视频...')
-                Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
+                # Common.download_method(log_type, crawler, 'video', video_dict['video_title'], video_dict['video_url'])
+                Common.download_method(log_type, crawler, 'youtube_video', video_dict['video_title'], video_dict['video_url'])
                 ffmpeg_dict = Common.ffmpeg(log_type, crawler, f"./{crawler}/videos/{video_dict['video_title']}/video.mp4")
                 video_width = int(ffmpeg_dict['width'])
                 video_height = int(ffmpeg_dict['height'])
@@ -1015,12 +1029,12 @@ class Follow:
                 video_dict['avatar_url'] = video_dict['cover_url']
                 video_dict['session'] = f'youtube{int(time.time())}'
                 rule='1,2'
-                if duration < 60 or duration > 600:
-                    # 删除视频文件夹
-                    shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
-                    Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则,删除成功\n")
-                    return
-                elif video_size == 0 or duration == 0 or video_size is None or duration is None:
+                # if duration < 60 or duration > 600:
+                #     # 删除视频文件夹
+                #     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
+                #     Common.logger(log_type, crawler).info(f"时长:{video_dict['duration']}不满足抓取规则,删除成功\n")
+                #     return
+                if video_size == 0 or duration == 0 or video_size is None or duration is None:
                     # 删除视频文件夹
                     shutil.rmtree(f"./{crawler}/videos/{video_dict['video_title']}/")
                     Common.logger(log_type, crawler).info(f"视频下载出错,删除成功\n")
@@ -1120,9 +1134,10 @@ class Follow:
 
 
 if __name__ == "__main__":
-    print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
+    # print(Follow.get_browse_id('follow', 'youtube', '@chinatravel5971', "local"))
     # print(Follow.get_user_from_feishu('follow', 'youtube', 'c467d7', 'dev', 'local'))
     # Follow.get_out_user_info('follow', 'youtube', 'UC08jgxf119fzynp2uHCvZIg', '@weitravel')
     # Follow.get_video_info('follow', 'youtube', 'OGVK0IXBIhI')
     # Follow.get_follow_videos('follow', 'youtube', 'youtube_follow', 'out', 'dev', 'local')
+    print(Follow.filter_emoji("姐妹倆一唱一和,完美配合,終於把大慶降服了😅😅#萌娃搞笑日常"))
     pass

برخی فایل ها در این مقایسه diff نمایش داده نمی شوند زیرا تعداد فایل ها بسیار زیاد است