wangkun 2 سال پیش
والد
کامیت
2afc41db6d

BIN
.DS_Store


+ 5 - 2
README.md

@@ -19,11 +19,14 @@
 
 
 
 
 #### 使用说明
 #### 使用说明
-1.  cd crawler_shipinhao
-2.  python3 ./main/run_shipinhao_recommend.py
+1.  cd ./crawler_shipinhao
+2.  sh shipinhao.sh
 
 
 
 
 #### 需求
 #### 需求
+2022/12/20
+1. 新增定向脚本
+
 2022/10/27
 2022/10/27
 1. 新增新视榜单爬虫
 1. 新增新视榜单爬虫
 
 

+ 0 - 32
main/run_shipinhao.py

@@ -1,32 +0,0 @@
-# -*- coding: utf-8 -*-
-# @Author: wangkun
-# @Time: 2022/10/27
-import datetime
-import os
-import sys
-import time
-sys.path.append(os.getcwd())
-from main.common import Common
-from shipinhao.shipinhao_recommend import Recommend
-from xinshi.xinshi_pc import XinshiPC
-
-
-class Main:
-    @classmethod
-    def main(cls, env):
-        while True:
-            if 22 >= datetime.datetime.now().hour >= 10:
-                # Common.logger('xinshi').info('开始抓取"新视-PC"内容\n')
-                # XinshiPC.login('xinshi', env)
-                Recommend.run_recommend('recommend', env)
-                Common.del_logs('recommend')
-                Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)
-                Recommend.download_cnt = []
-                time.sleep(3600 * (24 - datetime.datetime.now().hour))
-
-            else:
-                pass
-
-
-if __name__ == '__main__':
-    Main.main('prod')

+ 24 - 0
main/run_shipinhao_follow.py

@@ -0,0 +1,24 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/12/14
+import os
+import sys
+import time
+sys.path.append(os.getcwd())
+from main.common import Common
+from shipinhao.shipinhao_follow import Follow
+
+
+class Main:
+    @classmethod
+    def main(cls, log_type, env):
+        while True:
+            Common.logger(log_type).info('开始抓取视频号定向榜单\n')
+            Follow.search_to_all_user_homepage(log_type, env)
+            Common.del_logs(log_type)
+            Common.logger(log_type).info('休眠 10 分钟')
+            time.sleep(60 * 10)
+
+
+if __name__ == '__main__':
+    Main.main('follow', 'dev')

+ 2 - 2
shipinhao/run_shipinhao_recommend.py → main/run_shipinhao_recommend.py

@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 # -*- coding: utf-8 -*-
 # @Author: wangkun
 # @Author: wangkun
-# @Time: 2022/8/31
+# @Time: 2022/10/27
 import datetime
 import datetime
 import os
 import os
 import sys
 import sys
@@ -14,7 +14,7 @@ class Main:
     @classmethod
     @classmethod
     def main(cls, env):
     def main(cls, env):
         while True:
         while True:
-            if 16 >= datetime.datetime.now().hour >= 10:
+            if 22 >= datetime.datetime.now().hour >= 10:
                 Recommend.run_recommend('recommend', env)
                 Recommend.run_recommend('recommend', env)
                 Common.del_logs('recommend')
                 Common.del_logs('recommend')
                 Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)
                 Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)

+ 0 - 0
nohup.out


+ 19 - 0
shipinhao.sh

@@ -0,0 +1,19 @@
+#!/bin/bash
+echo "开始"
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在杀进程..."
+# shellcheck disable=SC2009
+# ps aux | grep run_shipinhao
+ps aux | grep run_shipinhao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9
+# shellcheck disable=SC2009
+#ps aux | grep run_shipinhao_recommend.py | grep -v grep | awk '{print $2}' | xargs kill -9
+echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死!"
+
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
+cd /Users/lieyunye/Desktop/crawler/crawler_shipinhao/ && git pull origin master --force && rm -f nohup.log
+echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成!"
+
+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
+#nohup python3 -u main/run_shipinhao_recommend.py >>./nohup.log 2>&1 &
+nohup python3 -u main/run_shipinhao_follow.py >>./nohup.log 2>&1 &
+echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
+exit 0

+ 427 - 0
shipinhao/shipinhao_follow.py

@@ -0,0 +1,427 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/12/14
+import os
+import sys
+import time
+from appium import webdriver
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from selenium.common import NoSuchElementException
+from appium.webdriver.webdriver import WebDriver
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from main.common import Common
+from main.feishu_lib import Feishu
+from shipinhao.shipinhao_publish import Publish
+
+
+class Follow:
+    # 过滤词库
+    @classmethod
+    def filter_words(cls, log_type):
+        try:
+            filter_words_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'gmeOgJ')
+            filter_words_list = []
+            for x in filter_words_sheet:
+                for y in x:
+                    if y is None:
+                        pass
+                    else:
+                        filter_words_list.append(y)
+            return filter_words_list
+        except Exception as e:
+            Common.logger(log_type).error('filter_words异常:{}\n', e)
+
+    @classmethod
+    def get_users_from_feishu(cls, log_type):
+        try:
+            users_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'yVFqxa')
+            user_list = []
+            for i in range(1, len(users_sheet)):
+                user_name = users_sheet[i][1]
+                if user_name is not None:
+                    user_list.append(user_name)
+            return user_list
+        except Exception as e:
+            Common.logger(log_type).error(f'get_users_from_feishu异常:{e}\n')
+
+    @classmethod
+    def start_follow_wechat(cls, log_type, user_name, env):
+        # try:
+        Common.logger(log_type).info('启动微信')
+        caps = {
+            "platformName": "Android",  # 手机操作系统 Android / iOS
+            "deviceName": "Android",  # 连接的设备名(模拟器或真机),安卓可以随便写
+            "platforVersion": "11",  # 手机对应的系统版本(Android 11)
+            "appPackage": "com.tencent.mm",  # 被测APP的包名,乐活圈 Android
+            "appActivity": ".ui.LauncherUI",  # 启动的Activity名
+            "autoGrantPermissions": "true",  # 让 appium 自动授权 base 权限,
+            # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
+            "unicodekeyboard": True,  # 使用自带输入法,输入中文时填True
+            "resetkeyboard": True,  # 执行完程序恢复原来输入法
+            "noReset": True,  # 不重置APP
+            "printPageSourceOnFailure": True,  # 找不到元素时,appium log 会完整记录当前页面的 pagesource
+            "newCommandTimeout": 6000,  # 初始等待时间
+            "automationName": "UiAutomator2",  # 使用引擎,默认为 Appium,
+            # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
+            "showChromedriverLog": True,
+            # "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+            "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
+            'enableWebviewDetailsCollection': True,
+            'setWebContentsDebuggingEnabled': True,
+            'chromedriverExecutable': '/Users/wangkun/Downloads/chromedriver_v86/chromedriver',
+            # 'chromedriverExecutable': '/Users/lieyunye/Downloads/chromedriver_v86/chromedriver',
+        }
+        driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        driver.implicitly_wait(10)
+
+        cls.search_to_user_homepage(log_type, user_name, driver)
+
+        cls.search_user_videos(log_type, driver, env)
+
+        Common.logger(log_type).info('休眠 3s')
+        time.sleep(3)
+        cls.quit(log_type, driver)
+
+        # except Exception as e:
+        #     Common.logger(log_type).error('start_follow_wechat异常:{}\n', e)
+
+    @classmethod
+    def quit(cls, log_type, driver: WebDriver):
+        driver.quit()
+        Common.logger(log_type).info('退出 APP 成功\n')
+
+    @classmethod
+    def search_element(cls, log_type, driver: WebDriver, element):
+        try:
+            windowHandles = driver.window_handles
+            # 遍历所有的handles,找到当前页面所在的handle:如果pageSource有包含你想要的元素,就是所要找的handle
+            # 小程序的页面来回切换也需要:遍历所有的handles,切换到元素所在的handle
+            for handle in windowHandles:
+                driver.switch_to.window(handle)
+                time.sleep(3)
+                if len(driver.find_elements(By.XPATH, element)) != 0:
+                    return driver.find_element(By.XPATH, element)
+                else:
+                    pass
+        except Exception as e:
+            Common.logger(log_type).warning('search_element异常:{}\n', e)
+
+    @classmethod
+    def search_to_user_homepage(cls, log_type, user_name, driver: WebDriver):
+        Common.logger(log_type).info('点击搜索按钮')
+        driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
+
+        Common.logger(log_type).info(f'输入搜索词:{user_name}')
+        driver.find_element(By.ID, 'com.tencent.mm:id/cd7').send_keys(user_name)
+        driver.press_keycode(AndroidKey.ENTER)
+
+        Common.logger(log_type).info('点击进入搜索结果页')
+        driver.find_element(By.ID, 'com.tencent.mm:id/m94').click()
+
+        Common.logger(log_type).info('切换到webview')
+        webview = driver.contexts
+        driver.switch_to.context(webview[1])
+
+        time.sleep(3)
+        Common.logger(log_type).info('点击"视频号"分类')
+        cls.search_element(log_type, driver, '//div[@class="unit"]/*[2]').click()
+
+        time.sleep(3)
+        Common.logger(log_type).info(f'进入用户主页:{user_name}')
+        user_element = cls.search_element(log_type, driver, '//div[@class="video-account__container search_item_inner"]')
+        if user_element is None:
+            Common.logger(log_type).info(f'未搜索到用户:{user_name}\n')
+            return
+        else:
+            user_element.click()
+            time.sleep(1)
+            Common.logger(log_type).info(f'进入 {user_name} 主页成功\n')
+
+    @classmethod
+    def search_user_videos(cls, log_type, driver: WebDriver, env):
+        Common.logger(log_type).info('切回NATIVE_APP')
+        driver.switch_to.context('NATIVE_APP')
+
+        # 判断置顶视频
+        top_videos = driver.find_elements(By.ID, 'com.tencent.mm:id/i56')
+        Common.logger(log_type).info(f'发现置顶视频{len(top_videos)}个\n')
+        if len(top_videos) == 0:
+            return
+        else:
+            for i in range(len(top_videos)):
+                top_videos[i].click()
+                cls.download_publish(log_type, driver, env)
+                driver.press_keycode(AndroidKey.BACK)
+
+        # 判断非置顶视频
+        not_top_first_video = driver.find_elements(By.ID, 'com.tencent.mm:id/nmz')[len(top_videos)]
+        not_top_first_video.click()
+        while True:
+            cls.download_publish(log_type, driver, env)
+            driver.swipe(10, 1600, 10, 300, 200)
+            if len(driver.find_elements(By.ID, 'com.tencent.mm:id/g2s')) > 0:
+                Common.logger(log_type).info('到底啦 ~\n')
+                return
+
+    @classmethod
+    def get_video_info(cls, log_type, driver: WebDriver):
+        driver.implicitly_wait(10)
+        # 视频标题
+        try:
+            title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
+            video_title = title_id.get_attribute('name').split('\n')[0].strip()
+        except NoSuchElementException:
+            video_title = ''
+
+        # 点击播放器,获取视频时长
+        # Common.logger(log_type).info('暂停播放')
+        pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
+        pause_btn.click()
+        start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
+        start_time = int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[-1])
+        try:
+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
+        except NoSuchElementException:
+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
+        end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
+        duration = start_time + end_time
+
+        # 点赞
+        like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
+        like_cnt = like_id.get_attribute('name')
+        if like_cnt == "" or like_cnt == "喜欢":
+            like_cnt = 0
+        elif '万' in like_cnt:
+            like_cnt = float(like_cnt.split('万')[0]) * 10000
+        elif '万+' in like_cnt:
+            like_cnt = float(like_cnt.split('万+')[0]) * 10000
+        else:
+            like_cnt = float(like_cnt)
+
+        # 分享
+        share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
+        share_cnt = share_id.get_attribute('name')
+        if share_cnt == "" or share_cnt == "转发":
+            share_cnt = 0
+        elif '万' in share_cnt:
+            share_cnt = float(share_cnt.split('万')[0]) * 10000
+        elif '万+' in share_cnt:
+            share_cnt = float(share_cnt.split('万+')[0]) * 10000
+        else:
+            share_cnt = float(share_cnt)
+
+        # 收藏
+        favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
+        favorite_cnt = favorite_id.get_attribute('name')
+        if favorite_cnt == "" or favorite_cnt == "收藏":
+            favorite_cnt = 0
+        elif '万' in favorite_cnt:
+            favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
+        elif '万+' in favorite_cnt:
+            favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
+        else:
+            favorite_cnt = float(favorite_cnt)
+
+        # 评论
+        comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
+        comment_cnt = comment_id.get_attribute('name')
+        if comment_cnt == "" or comment_cnt == "评论":
+            comment_cnt = 0
+        elif '万' in comment_cnt:
+            comment_cnt = float(comment_cnt.split('万')[0]) * 10000
+        elif '万+' in comment_cnt:
+            comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
+        else:
+            comment_cnt = float(comment_cnt)
+
+        # 用户名
+        username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
+        user_name = username_id.get_attribute('name')
+
+        Common.logger(log_type).info('video_title:{}', video_title)
+        Common.logger(log_type).info('duration:{}', duration)
+        Common.logger(log_type).info('like_cnt:{}', like_cnt)
+        Common.logger(log_type).info('share_cnt:{}', share_cnt)
+        Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
+        Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
+        Common.logger(log_type).info('user_name:{}', user_name)
+
+        video_dict = {
+            'video_title': video_title,
+            'duration': duration,
+            'like_cnt': like_cnt,
+            'share_cnt': share_cnt,
+            'share_id': share_id,
+            'favorite_cnt': favorite_cnt,
+            'comment_cnt': comment_cnt,
+            'user_name': user_name
+        }
+        return video_dict
+
+    @classmethod
+    def download_publish(cls, log_type, driver: WebDriver, env):
+        video_dict = cls.get_video_info(log_type, driver)
+        if int(video_dict['duration']) < 50:
+            Common.logger(log_type).info(f'时长:{int(video_dict["duration"])} < 50 秒\n')
+        elif video_dict['video_title'] == '':
+            Common.logger(log_type).info('视频标题为空\n')
+        # 过滤词库(视频标题)
+        elif any(word if word in video_dict['video_title'] else False for word in cls.filter_words(log_type)) is True:
+            Common.logger(log_type).info(f'视频已中过滤词:{video_dict["video_title"]}\n')
+        # 视频号推荐_已下载表
+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in y]:
+            Common.logger(log_type).info('视频已下载\n')
+        # 视频号定向_已下载表
+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'XxmRlE') for x in y]:
+            Common.logger(log_type).info('视频已下载\n')
+        # feeds 表去重
+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
+            Common.logger(log_type).info('视频已存在\n')
+        # feeds 表去重
+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ') for x in y]:
+            Common.logger(log_type).info('视频已存在\n')
+        # 分享给 windows 爬虫机
+        else:
+            video_dict['share_id'].click()
+            driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
+            driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
+            driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
+
+            # 把视频信息写入飞书feeds文档
+            Feishu.insert_columns(log_type, 'shipinhao', 'qzDljJ', 'ROWS', 1, 2)
+            get_feeds_time = int(time.time())
+            values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
+                       '定向榜',
+                       str(video_dict['video_title']),
+                       int(video_dict['duration']),
+                       int(video_dict['like_cnt']),
+                       int(video_dict['share_cnt']),
+                       int(video_dict['favorite_cnt']),
+                       int(video_dict['comment_cnt']),
+                       str(video_dict['user_name'])]]
+            time.sleep(1)
+            Feishu.update_values(log_type, 'shipinhao', 'qzDljJ', 'A2:Z2', values)
+            Common.logger(log_type).info('视频信息写入飞书文档成功\n')
+
+            while True:
+                if Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')[1][11] is None:
+                    Common.logger(log_type).info('等待更新 URL 信息')
+                    time.sleep(10)
+                else:
+                    Common.logger(log_type).info('URL 信息已更新\n')
+                    break
+
+            cls.publish(log_type, env)
+
+    # 下载 、上传
+    @classmethod
+    def publish(cls, log_type, env):
+        try:
+            follow_feeds_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')
+            for i in range(1, len(follow_feeds_sheet)):
+                download_title = follow_feeds_sheet[i][2].strip().replace('"', '') \
+                    .replace('“', '').replace('“', '…').replace("\n", "") \
+                    .replace("/", "").replace("\r", "") \
+                    .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
+                    .replace(":", "").replace("*", "").replace("?", "") \
+                    .replace("?", "").replace('"', "").replace("<", "") \
+                    .replace(">", "").replace("|", "").replace(" ", "")
+                download_duration = follow_feeds_sheet[i][3]
+                download_like_cnt = follow_feeds_sheet[i][4]
+                download_share_cnt = follow_feeds_sheet[i][5]
+                download_favorite_cnt = follow_feeds_sheet[i][6]
+                download_comment_cnt = follow_feeds_sheet[i][7]
+                download_username = follow_feeds_sheet[i][8]
+                download_head_url = follow_feeds_sheet[i][9]
+                download_cover_url = follow_feeds_sheet[i][10]
+                download_video_url = follow_feeds_sheet[i][11]
+
+                Common.logger(log_type).info("download_title:{}", download_title)
+                Common.logger(log_type).info("download_username:{}", download_username)
+                Common.logger(log_type).info("download_video_url:{}", download_video_url)
+
+                if download_title is None or download_duration is None or download_video_url is None:
+                    Feishu.dimension_range(log_type, 'shipinhao', 'qzDljJ', 'ROWS', i + 1, i + 1)
+                    Common.logger(log_type).info('空行,删除成功\n')
+                    return
+                else:
+                    # 下载封面
+                    Common.download_method(log_type=log_type, text="cover",
+                                           d_name=str(download_title), d_url=str(download_cover_url))
+                    # 下载视频
+                    Common.download_method(log_type=log_type, text="video",
+                                           d_name=str(download_title), d_url=str(download_video_url))
+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                    with open("./videos/" + download_title
+                              + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
+                        f_a.write('shipinhao' + str(int(time.time())) + "\n" +
+                                  str(download_title) + "\n" +
+                                  str(download_duration) + "\n" +
+                                  str(download_favorite_cnt) + "\n" +
+                                  str(download_comment_cnt) + "\n" +
+                                  str(download_like_cnt) + "\n" +
+                                  str(download_share_cnt) + "\n" +
+                                  str(1920 * 1080) + "\n" +
+                                  str(int(time.time())) + "\n" +
+                                  str(download_username) + "\n" +
+                                  str(download_head_url) + "\n" +
+                                  str(download_video_url) + "\n" +
+                                  str(download_cover_url) + "\n" +
+                                  "SHIPINHAO" + str(int(time.time())))
+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
+
+                    Common.logger(log_type).info("开始上传视频:{}".format(download_title))
+                    our_video_id = Publish.upload_and_publish(log_type, env, "follow")
+                    if env == 'dev':
+                        our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
+                            our_video_id) + "/info"
+                    else:
+                        our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
+                            our_video_id) + "/info"
+                    Common.logger(log_type).info("视频上传完成:{}", our_video_link)
+
+                    # 视频ID工作表,插入首行
+                    Feishu.insert_columns(log_type, "shipinhao", "XxmRlE", "ROWS", 1, 2)
+                    # 视频ID工作表,首行写入数据
+                    upload_time = int(time.time())
+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                               "定向榜",
+                               str(download_title),
+                               our_video_link,
+                               download_duration,
+                               download_like_cnt,
+                               download_share_cnt,
+                               download_favorite_cnt,
+                               download_comment_cnt,
+                               download_username,
+                               str(download_head_url),
+                               str(download_cover_url),
+                               str(download_video_url)]]
+                    time.sleep(1)
+                    Feishu.update_values(log_type, "shipinhao", "XxmRlE", "F2:V2", values)
+
+                    # 删除行或列,可选 ROWS、COLUMNS
+                    time.sleep(1)
+                    Feishu.dimension_range(log_type, "shipinhao", "qzDljJ", "ROWS", i + 1, i + 1)
+                    Common.logger(log_type).info("下载/上传成功\n")
+                    return
+        except Exception as e:
+            Feishu.dimension_range(log_type, "shipinhao", "qzDljJ", "ROWS", 2, 2)
+            Common.logger(log_type).error('download_publish异常,删除视频信息成功:{}\n', e)
+
+    @classmethod
+    def search_to_all_user_homepage(cls, log_type, env):
+        user_list = cls.get_users_from_feishu(log_type)
+        for user in user_list:
+            cls.start_follow_wechat(log_type, user, env)
+        Common.logger(log_type).info('所有用户已抓取完毕\n')
+
+
+if __name__ == '__main__':
+    # print(Follow.get_users_from_feishu('follow'))
+    # print(len(Follow.get_users_from_feishu('follow')))
+    print(Follow.filter_words('follow'))
+    # Follow.search_to_all_user_homepage('follow')
+    pass
+

+ 6 - 6
shipinhao/shipinhao_publish.py

@@ -143,9 +143,9 @@ class Publish:
     video_file = 'video'
     video_file = 'video'
     image_file = 'image'
     image_file = 'image'
     info_file = 'info'
     info_file = 'info'
-    uids_dev_up = [6267140]
+    uids_dev_follow = [6267140]
     uids_dev_play = [6267141]
     uids_dev_play = [6267141]
-    uids_prod_up = [20631278, 20631279]
+    uids_prod_follow = [26117633, 26117634, 26117635, 26117636, 26117637, 26117638, 26117639, 26117640, 26117641, 26117642]
     uids_prod_play = [20631278, 20631279]
     uids_prod_play = [20631278, 20631279]
 
 
     @classmethod
     @classmethod
@@ -176,12 +176,12 @@ class Publish:
                     now_timestamp = int(round(time.time() * 1000))
                     now_timestamp = int(round(time.time() * 1000))
                     data['crawlerTaskTimestamp'] = str(now_timestamp)
                     data['crawlerTaskTimestamp'] = str(now_timestamp)
                     global uid
                     global uid
-                    if env == "dev" and job == "up":
-                        uid = str(random.choice(cls.uids_dev_up))
+                    if env == "dev" and job == "follow":
+                        uid = str(random.choice(cls.uids_dev_follow))
                     elif env == "dev" and job == "play":
                     elif env == "dev" and job == "play":
                         uid = str(random.choice(cls.uids_dev_play))
                         uid = str(random.choice(cls.uids_dev_play))
-                    elif env == "prod" and job == "up":
-                        uid = str(random.choice(cls.uids_prod_up))
+                    elif env == "prod" and job == "follow":
+                        uid = str(random.choice(cls.uids_prod_follow))
                     elif env == "prod" and job == "play":
                     elif env == "prod" and job == "play":
                         uid = str(random.choice(cls.uids_prod_play))
                         uid = str(random.choice(cls.uids_prod_play))
                     data['loginUid'] = uid
                     data['loginUid'] = uid

+ 2 - 2
shipinhao/shipinhao_recommend.py

@@ -29,13 +29,13 @@ class Recommend:
                     cls.download_cnt = []
                     cls.download_cnt = []
                     return
                     return
                 else:
                 else:
-                    cls.start_wechat(log_type, env)
+                    cls.start_recommend_wechat(log_type, env)
         except Exception as e:
         except Exception as e:
             Common.logger(log_type).error('run_recommend异常:{}\n', e)
             Common.logger(log_type).error('run_recommend异常:{}\n', e)
 
 
     # 启动微信,并打开视频号
     # 启动微信,并打开视频号
     @classmethod
     @classmethod
-    def start_wechat(cls, log_type, env):
+    def start_recommend_wechat(cls, log_type, env):
         try:
         try:
             Common.logger(log_type).info('启动微信')
             Common.logger(log_type).info('启动微信')
             caps = {
             caps = {

+ 168 - 0
shipinhao/windows_follow.py

@@ -0,0 +1,168 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2022/12/21
+import json
+import os
+import sys
+import time
+import psutil as psutil
+from appium import webdriver
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from main.feishu_lib import Feishu
+from main.common import Common
+
+
+class ShipinhaoWindows:
+    @classmethod
+    def kill_pid(cls, log_type):
+        try:
+            os.system('chcp 65001')  # 将cmd的显示字符编码从默认的GBK改为UTF-8
+            list_process = list()
+            pid_list = psutil.pids()
+            for sub_pid in pid_list:
+                try:
+                    process_info = psutil.Process(sub_pid)
+                    if process_info.name() == 'WechatBrowser.exe' or process_info.name() == 'WeChatPlayer.exe':
+                        list_process.append(sub_pid)
+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
+                    pass
+            for pid in list_process:
+                os.system('taskkill /f /pid ' + str(pid))
+        except Exception as e:
+            Common.logger(log_type).error('kill_pid异常:{}', e)
+
+    @classmethod
+    def click_video(cls, log_type):
+        try:
+            Common.logger(log_type).info('启动"微信"')
+            desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
+            driver = webdriver.Remote(
+                command_executor='http://127.0.0.1:4723',
+                desired_capabilities=desired_caps)
+            driver.implicitly_wait(10)
+
+            # Common.logger(log_type).info('点击"聊天窗口"')
+            # driver.find_element(By.NAME, '聊天').click()
+            #
+            # Common.logger(log_type).info('点击"爬虫群"')
+            # driver.find_elements(By.NAME, '爬虫群')[0].click()
+
+            Common.logger(log_type).info('点击视频')
+            driver.find_elements(By.NAME, '消息')[-1].click()
+
+            Common.logger(log_type).info('休眠 10 秒,退出视频号')
+            time.sleep(10)
+            cls.kill_pid(log_type)
+
+            Common.logger(log_type).info('退出微信')
+            driver.quit()
+        except Exception as e:
+            Common.logger(log_type).error('click_video异常:{}', e)
+
+    @classmethod
+    def get_url(cls, log_type):
+        try:
+            # charles 抓包文件保存目录
+            charles_file_dir = r"./chlsfiles/"
+
+            if len(os.listdir(charles_file_dir)) == 0:
+                Common.logger(log_type).info("未找到chlsfile文件,等待2s")
+                time.sleep(2)
+            else:
+                # 目标文件夹下所有文件
+                all_file = sorted(os.listdir(charles_file_dir))
+
+                # 获取到目标文件
+                old_file = all_file[-1]
+
+                # 分离文件名与扩展名
+                new_file = os.path.splitext(old_file)
+
+                # 重命名文件后缀
+                os.rename(os.path.join(charles_file_dir, old_file),
+                          os.path.join(charles_file_dir, new_file[0] + ".txt"))
+
+                with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
+                    contents = json.load(f, strict=False)
+
+                video_url_list = []
+                cover_url_list = []
+
+                if "finder.video.qq.com" in [text['host'] for text in contents]:
+                    for text in contents:
+                        if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
+                            video_url_list.append(text)
+                        elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
+                            cover_url_list.append(text)
+
+                    video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
+                    cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
+                    head_url = cover_url
+
+                    # print(f'video_url:{video_url}')
+                    # print(f'cover_url:{cover_url}')
+                    # print(f'head_url:{head_url}')
+
+                    return video_url, cover_url, head_url
+                else:
+                    Common.logger(log_type).info("未找到url")
+                    return '未找到url'
+
+        except Exception as e:
+            Common.logger(log_type).exception("get_url异常:{}\n", e)
+            return None
+
+    @classmethod
+    def write_url(cls, log_type):
+        try:
+            while True:
+                if Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')[1][11] is None:
+                    Common.del_charles_files('follow')
+                    cls.click_video(log_type)
+                    Common.logger(log_type).info('等待 2s')
+                    time.sleep(2)
+                    Common.logger(log_type).info('获取视频头像/封面/播放地址')
+                    urls = cls.get_url(log_type)
+                    if urls == '未找到url':
+                        time.sleep(1)
+                        cls.write_url(log_type)
+                    elif urls is None:
+                        time.sleep(1)
+                        cls.write_url(log_type)
+                    else:
+                        Feishu.update_values(log_type, 'shipinhao', 'qzDljJ', 'J2:L2',
+                                             [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
+                        Common.logger(log_type).info('视频地址信息写入飞书成功\n')
+                        Common.del_charles_files('follow')
+                        break
+                else:
+                    Common.logger(log_type).info('视频已有地址信息,休眠 10s')
+                    time.sleep(10)
+                    break
+        except Exception as e:
+            # Feishu.dimension_range(log_type, 'shipinhao', 'qzDljJ', 'ROWS', 2, 2)
+            Common.logger(log_type).error('write_url异常:{}\n', e)
+
+    @classmethod
+    def run_get_url(cls, log_type):
+        try:
+            while True:
+                if len(Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')) == 1:
+                    Common.logger(log_type).info('暂无需要获取地址的视频信息')
+                    time.sleep(30)
+                    break
+                else:
+                    cls.write_url(log_type)
+
+        except Exception as e:
+            Common.logger(log_type).error('run_get_url异常:{}\n', e)
+
+
+if __name__ == '__main__':
+    while True:
+        ShipinhaoWindows.run_get_url('follow')
+        Common.del_logs('follow')
+        time.sleep(1)
+
+    pass

+ 0 - 0
shipinhao/shipinhao_windows.py → shipinhao/windows_recommend.py