3 anni fa · 2afc41db6d
--- a/.DS_Store
+++ b/.DS_Store
--- a/README.md
+++ b/README.md
@@ -19,11 +19,14 @@
 
															 #### 使用说明
														
 
															-1.  cd crawler_shipinhao
														
 
															-2.  python3 ./main/run_shipinhao_recommend.py
														
 
															+1.  cd ./crawler_shipinhao
														
 
															+2.  sh shipinhao.sh
														
 
															 #### 需求
														
 
															+2022/12/20
														
 
															+1. 新增定向脚本
														
 
															+
														
 
															 2022/10/27
														
 
															 1. 新增新视榜单爬虫
														
--- a/main/run_shipinhao.py
+++ b/main/run_shipinhao.py
@@ -1,32 +0,0 @@
 
															-# -*- coding: utf-8 -*-
														
 
															-# @Author: wangkun
														
 
															-# @Time: 2022/10/27
														
 
															-import datetime
														
 
															-import os
														
 
															-import sys
														
 
															-import time
														
 
															-sys.path.append(os.getcwd())
														
 
															-from main.common import Common
														
 
															-from shipinhao.shipinhao_recommend import Recommend
														
 
															-from xinshi.xinshi_pc import XinshiPC
														
 
															-
														
 
															-
														
 
															-class Main:
														
 
															-    @classmethod
														
 
															-    def main(cls, env):
														
 
															-        while True:
														
 
															-            if 22 >= datetime.datetime.now().hour >= 10:
														
 
															-                # Common.logger('xinshi').info('开始抓取"新视-PC"内容\n')
														
 
															-                # XinshiPC.login('xinshi', env)
														
 
															-                Recommend.run_recommend('recommend', env)
														
 
															-                Common.del_logs('recommend')
														
 
															-                Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)
														
 
															-                Recommend.download_cnt = []
														
 
															-                time.sleep(3600 * (24 - datetime.datetime.now().hour))
														
 
															-
														
 
															-            else:
														
 
															-                pass
														
 
															-
														
 
															-
														
 
															-if __name__ == '__main__':
														
 
															-    Main.main('prod')
														
--- a/main/run_shipinhao_follow.py
+++ b/main/run_shipinhao_follow.py
@@ -0,0 +1,24 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Author: wangkun
														
 
															+# @Time: 2022/12/14
														
 
															+import os
														
 
															+import sys
														
 
															+import time
														
 
															+sys.path.append(os.getcwd())
														
 
															+from main.common import Common
														
 
															+from shipinhao.shipinhao_follow import Follow
														
 
															+
														
 
															+
														
 
															+class Main:
														
 
															+    @classmethod
														
 
															+    def main(cls, log_type, env):
														
 
															+        while True:
														
 
															+            Common.logger(log_type).info('开始抓取视频号定向榜单\n')
														
 
															+            Follow.search_to_all_user_homepage(log_type, env)
														
 
															+            Common.del_logs(log_type)
														
 
															+            Common.logger(log_type).info('休眠 10 分钟')
														
 
															+            time.sleep(60 * 10)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    Main.main('follow', 'dev')
														
--- a/shipinhao/run_shipinhao_recommend.py
+++ b/shipinhao/run_shipinhao_recommend.py
@@ -1,6 +1,6 @@
 
															 # -*- coding: utf-8 -*-
														
 
															 # @Author: wangkun
														
 
															-# @Time: 2022/8/31
														
 
															+# @Time: 2022/10/27
														
 
															 import datetime
														
 
															 import os
														
 
															 import sys
														
@@ -14,7 +14,7 @@ class Main:
 
															     @classmethod
														
 
															     def main(cls, env):
														
 
															         while True:
														
 
															-            if 16 >= datetime.datetime.now().hour >= 10:
														
 
															+            if 22 >= datetime.datetime.now().hour >= 10:
														
 
															                 Recommend.run_recommend('recommend', env)
														
 
															                 Common.del_logs('recommend')
														
 
															                 Common.logger('recommend').info('休眠{}小时\n', 24 - datetime.datetime.now().hour)
														
--- a/nohup.out
+++ b/nohup.out
--- a/shipinhao.sh
+++ b/shipinhao.sh
@@ -0,0 +1,19 @@
 
															+#!/bin/bash
														
 
															+echo "开始"
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在杀进程..."
														
 
															+# shellcheck disable=SC2009
														
 
															+# ps aux | grep run_shipinhao
														
 
															+ps aux | grep run_shipinhao_follow.py | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															+# shellcheck disable=SC2009
														
 
															+#ps aux | grep run_shipinhao_recommend.py | grep -v grep | awk '{print $2}' | xargs kill -9
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 进程已杀死！"
														
 
															+
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在更新代码..."
														
 
															+cd /Users/lieyunye/Desktop/crawler/crawler_shipinhao/ && git pull origin master --force && rm -f nohup.log
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 代码更新完成！"
														
 
															+
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 正在重启服务..."
														
 
															+#nohup python3 -u main/run_shipinhao_recommend.py >>./nohup.log 2>&1 &
														
 
															+nohup python3 -u main/run_shipinhao_follow.py >>./nohup.log 2>&1 &
														
 
															+echo "$(date "+%Y-%m-%d %H:%M:%S") 服务重启完毕!"
														
 
															+exit 0
														
--- a/shipinhao/shipinhao_follow.py
+++ b/shipinhao/shipinhao_follow.py
@@ -0,0 +1,427 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Author: wangkun
														
 
															+# @Time: 2022/12/14
														
 
															+import os
														
 
															+import sys
														
 
															+import time
														
 
															+from appium import webdriver
														
 
															+from appium.webdriver.extensions.android.nativekey import AndroidKey
														
 
															+from selenium.common import NoSuchElementException
														
 
															+from appium.webdriver.webdriver import WebDriver
														
 
															+from selenium.webdriver.common.by import By
														
 
															+sys.path.append(os.getcwd())
														
 
															+from main.common import Common
														
 
															+from main.feishu_lib import Feishu
														
 
															+from shipinhao.shipinhao_publish import Publish
														
 
															+
														
 
															+
														
 
															+class Follow:
														
 
															+    # 过滤词库
														
 
															+    @classmethod
														
 
															+    def filter_words(cls, log_type):
														
 
															+        try:
														
 
															+            filter_words_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'gmeOgJ')
														
 
															+            filter_words_list = []
														
 
															+            for x in filter_words_sheet:
														
 
															+                for y in x:
														
 
															+                    if y is None:
														
 
															+                        pass
														
 
															+                    else:
														
 
															+                        filter_words_list.append(y)
														
 
															+            return filter_words_list
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).error('filter_words异常:{}\n', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_users_from_feishu(cls, log_type):
														
 
															+        try:
														
 
															+            users_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'yVFqxa')
														
 
															+            user_list = []
														
 
															+            for i in range(1, len(users_sheet)):
														
 
															+                user_name = users_sheet[i][1]
														
 
															+                if user_name is not None:
														
 
															+                    user_list.append(user_name)
														
 
															+            return user_list
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).error(f'get_users_from_feishu异常:{e}\n')
														
 
															+
														
 
															+    @classmethod
														
 
															+    def start_follow_wechat(cls, log_type, user_name, env):
														
 
															+        # try:
														
 
															+        Common.logger(log_type).info('启动微信')
														
 
															+        caps = {
														
 
															+            "platformName": "Android",  # 手机操作系统 Android / iOS
														
 
															+            "deviceName": "Android",  # 连接的设备名（模拟器或真机），安卓可以随便写
														
 
															+            "platforVersion": "11",  # 手机对应的系统版本（Android 11）
														
 
															+            "appPackage": "com.tencent.mm",  # 被测APP的包名，乐活圈 Android
														
 
															+            "appActivity": ".ui.LauncherUI",  # 启动的Activity名
														
 
															+            "autoGrantPermissions": "true",  # 让 appium 自动授权 base 权限，
														
 
															+            # 如果 noReset 为 True，则该条不生效（该参数为 Android 独有），对应的值为 True 或 False
														
 
															+            "unicodekeyboard": True,  # 使用自带输入法，输入中文时填True
														
 
															+            "resetkeyboard": True,  # 执行完程序恢复原来输入法
														
 
															+            "noReset": True,  # 不重置APP
														
 
															+            "printPageSourceOnFailure": True,  # 找不到元素时，appium log 会完整记录当前页面的 pagesource
														
 
															+            "newCommandTimeout": 6000,  # 初始等待时间
														
 
															+            "automationName": "UiAutomator2",  # 使用引擎，默认为 Appium，
														
 
															+            # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android，XCUITest 用于 iOS
														
 
															+            "showChromedriverLog": True,
														
 
															+            # "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
														
 
															+            "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
														
 
															+            'enableWebviewDetailsCollection': True,
														
 
															+            'setWebContentsDebuggingEnabled': True,
														
 
															+            'chromedriverExecutable': '/Users/wangkun/Downloads/chromedriver_v86/chromedriver',
														
 
															+            # 'chromedriverExecutable': '/Users/lieyunye/Downloads/chromedriver_v86/chromedriver',
														
 
															+        }
														
 
															+        driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
														
 
															+        driver.implicitly_wait(10)
														
 
															+
														
 
															+        cls.search_to_user_homepage(log_type, user_name, driver)
														
 
															+
														
 
															+        cls.search_user_videos(log_type, driver, env)
														
 
															+
														
 
															+        Common.logger(log_type).info('休眠 3s')
														
 
															+        time.sleep(3)
														
 
															+        cls.quit(log_type, driver)
														
 
															+
														
 
															+        # except Exception as e:
														
 
															+        #     Common.logger(log_type).error('start_follow_wechat异常:{}\n', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def quit(cls, log_type, driver: WebDriver):
														
 
															+        driver.quit()
														
 
															+        Common.logger(log_type).info('退出 APP 成功\n')
														
 
															+
														
 
															+    @classmethod
														
 
															+    def search_element(cls, log_type, driver: WebDriver, element):
														
 
															+        try:
														
 
															+            windowHandles = driver.window_handles
														
 
															+            # 遍历所有的handles，找到当前页面所在的handle：如果pageSource有包含你想要的元素，就是所要找的handle
														
 
															+            # 小程序的页面来回切换也需要：遍历所有的handles，切换到元素所在的handle
														
 
															+            for handle in windowHandles:
														
 
															+                driver.switch_to.window(handle)
														
 
															+                time.sleep(3)
														
 
															+                if len(driver.find_elements(By.XPATH, element)) != 0:
														
 
															+                    return driver.find_element(By.XPATH, element)
														
 
															+                else:
														
 
															+                    pass
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).warning('search_element异常:{}\n', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def search_to_user_homepage(cls, log_type, user_name, driver: WebDriver):
														
 
															+        Common.logger(log_type).info('点击搜索按钮')
														
 
															+        driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
														
 
															+
														
 
															+        Common.logger(log_type).info(f'输入搜索词:{user_name}')
														
 
															+        driver.find_element(By.ID, 'com.tencent.mm:id/cd7').send_keys(user_name)
														
 
															+        driver.press_keycode(AndroidKey.ENTER)
														
 
															+
														
 
															+        Common.logger(log_type).info('点击进入搜索结果页')
														
 
															+        driver.find_element(By.ID, 'com.tencent.mm:id/m94').click()
														
 
															+
														
 
															+        Common.logger(log_type).info('切换到webview')
														
 
															+        webview = driver.contexts
														
 
															+        driver.switch_to.context(webview[1])
														
 
															+
														
 
															+        time.sleep(3)
														
 
															+        Common.logger(log_type).info('点击"视频号"分类')
														
 
															+        cls.search_element(log_type, driver, '//div[@class="unit"]/*[2]').click()
														
 
															+
														
 
															+        time.sleep(3)
														
 
															+        Common.logger(log_type).info(f'进入用户主页:{user_name}')
														
 
															+        user_element = cls.search_element(log_type, driver, '//div[@class="video-account__container search_item_inner"]')
														
 
															+        if user_element is None:
														
 
															+            Common.logger(log_type).info(f'未搜索到用户:{user_name}\n')
														
 
															+            return
														
 
															+        else:
														
 
															+            user_element.click()
														
 
															+            time.sleep(1)
														
 
															+            Common.logger(log_type).info(f'进入 {user_name} 主页成功\n')
														
 
															+
														
 
															+    @classmethod
														
 
															+    def search_user_videos(cls, log_type, driver: WebDriver, env):
														
 
															+        Common.logger(log_type).info('切回NATIVE_APP')
														
 
															+        driver.switch_to.context('NATIVE_APP')
														
 
															+
														
 
															+        # 判断置顶视频
														
 
															+        top_videos = driver.find_elements(By.ID, 'com.tencent.mm:id/i56')
														
 
															+        Common.logger(log_type).info(f'发现置顶视频{len(top_videos)}个\n')
														
 
															+        if len(top_videos) == 0:
														
 
															+            return
														
 
															+        else:
														
 
															+            for i in range(len(top_videos)):
														
 
															+                top_videos[i].click()
														
 
															+                cls.download_publish(log_type, driver, env)
														
 
															+                driver.press_keycode(AndroidKey.BACK)
														
 
															+
														
 
															+        # 判断非置顶视频
														
 
															+        not_top_first_video = driver.find_elements(By.ID, 'com.tencent.mm:id/nmz')[len(top_videos)]
														
 
															+        not_top_first_video.click()
														
 
															+        while True:
														
 
															+            cls.download_publish(log_type, driver, env)
														
 
															+            driver.swipe(10, 1600, 10, 300, 200)
														
 
															+            if len(driver.find_elements(By.ID, 'com.tencent.mm:id/g2s')) > 0:
														
 
															+                Common.logger(log_type).info('到底啦 ~\n')
														
 
															+                return
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_video_info(cls, log_type, driver: WebDriver):
														
 
															+        driver.implicitly_wait(10)
														
 
															+        # 视频标题
														
 
															+        try:
														
 
															+            title_id = driver.find_element(By.ID, 'com.tencent.mm:id/ki5')
														
 
															+            video_title = title_id.get_attribute('name').split('\n')[0].strip()
														
 
															+        except NoSuchElementException:
														
 
															+            video_title = ''
														
 
															+
														
 
															+        # 点击播放器，获取视频时长
														
 
															+        # Common.logger(log_type).info('暂停播放')
														
 
															+        pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
														
 
															+        pause_btn.click()
														
 
															+        start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
														
 
															+        start_time = int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[-1])
														
 
															+        try:
														
 
															+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
														
 
															+        except NoSuchElementException:
														
 
															+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
														
 
															+        end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
														
 
															+        duration = start_time + end_time
														
 
															+
														
 
															+        # 点赞
														
 
															+        like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
														
 
															+        like_cnt = like_id.get_attribute('name')
														
 
															+        if like_cnt == "" or like_cnt == "喜欢":
														
 
															+            like_cnt = 0
														
 
															+        elif '万' in like_cnt:
														
 
															+            like_cnt = float(like_cnt.split('万')[0]) * 10000
														
 
															+        elif '万+' in like_cnt:
														
 
															+            like_cnt = float(like_cnt.split('万+')[0]) * 10000
														
 
															+        else:
														
 
															+            like_cnt = float(like_cnt)
														
 
															+
														
 
															+        # 分享
														
 
															+        share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
														
 
															+        share_cnt = share_id.get_attribute('name')
														
 
															+        if share_cnt == "" or share_cnt == "转发":
														
 
															+            share_cnt = 0
														
 
															+        elif '万' in share_cnt:
														
 
															+            share_cnt = float(share_cnt.split('万')[0]) * 10000
														
 
															+        elif '万+' in share_cnt:
														
 
															+            share_cnt = float(share_cnt.split('万+')[0]) * 10000
														
 
															+        else:
														
 
															+            share_cnt = float(share_cnt)
														
 
															+
														
 
															+        # 收藏
														
 
															+        favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
														
 
															+        favorite_cnt = favorite_id.get_attribute('name')
														
 
															+        if favorite_cnt == "" or favorite_cnt == "收藏":
														
 
															+            favorite_cnt = 0
														
 
															+        elif '万' in favorite_cnt:
														
 
															+            favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
														
 
															+        elif '万+' in favorite_cnt:
														
 
															+            favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
														
 
															+        else:
														
 
															+            favorite_cnt = float(favorite_cnt)
														
 
															+
														
 
															+        # 评论
														
 
															+        comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
														
 
															+        comment_cnt = comment_id.get_attribute('name')
														
 
															+        if comment_cnt == "" or comment_cnt == "评论":
														
 
															+            comment_cnt = 0
														
 
															+        elif '万' in comment_cnt:
														
 
															+            comment_cnt = float(comment_cnt.split('万')[0]) * 10000
														
 
															+        elif '万+' in comment_cnt:
														
 
															+            comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
														
 
															+        else:
														
 
															+            comment_cnt = float(comment_cnt)
														
 
															+
														
 
															+        # 用户名
														
 
															+        username_id = driver.find_element(By.ID, 'com.tencent.mm:id/hft')
														
 
															+        user_name = username_id.get_attribute('name')
														
 
															+
														
 
															+        Common.logger(log_type).info('video_title:{}', video_title)
														
 
															+        Common.logger(log_type).info('duration:{}', duration)
														
 
															+        Common.logger(log_type).info('like_cnt:{}', like_cnt)
														
 
															+        Common.logger(log_type).info('share_cnt:{}', share_cnt)
														
 
															+        Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
														
 
															+        Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
														
 
															+        Common.logger(log_type).info('user_name:{}', user_name)
														
 
															+
														
 
															+        video_dict = {
														
 
															+            'video_title': video_title,
														
 
															+            'duration': duration,
														
 
															+            'like_cnt': like_cnt,
														
 
															+            'share_cnt': share_cnt,
														
 
															+            'share_id': share_id,
														
 
															+            'favorite_cnt': favorite_cnt,
														
 
															+            'comment_cnt': comment_cnt,
														
 
															+            'user_name': user_name
														
 
															+        }
														
 
															+        return video_dict
														
 
															+
														
 
															+    @classmethod
														
 
															+    def download_publish(cls, log_type, driver: WebDriver, env):
														
 
															+        video_dict = cls.get_video_info(log_type, driver)
														
 
															+        if int(video_dict['duration']) < 50:
														
 
															+            Common.logger(log_type).info(f'时长:{int(video_dict["duration"])} < 50 秒\n')
														
 
															+        elif video_dict['video_title'] == '':
														
 
															+            Common.logger(log_type).info('视频标题为空\n')
														
 
															+        # 过滤词库(视频标题)
														
 
															+        elif any(word if word in video_dict['video_title'] else False for word in cls.filter_words(log_type)) is True:
														
 
															+            Common.logger(log_type).info(f'视频已中过滤词:{video_dict["video_title"]}\n')
														
 
															+        # 视频号推荐_已下载表
														
 
															+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'c77cf9') for x in y]:
														
 
															+            Common.logger(log_type).info('视频已下载\n')
														
 
															+        # 视频号定向_已下载表
														
 
															+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'XxmRlE') for x in y]:
														
 
															+            Common.logger(log_type).info('视频已下载\n')
														
 
															+        # feeds 表去重
														
 
															+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'FSDlBy') for x in y]:
														
 
															+            Common.logger(log_type).info('视频已存在\n')
														
 
															+        # feeds 表去重
														
 
															+        elif str(video_dict['video_title']) in [x for y in Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ') for x in y]:
														
 
															+            Common.logger(log_type).info('视频已存在\n')
														
 
															+        # 分享给 windows 爬虫机
														
 
															+        else:
														
 
															+            video_dict['share_id'].click()
														
 
															+            driver.find_element(By.XPATH, '//*[@text="转发给朋友"]').click()
														
 
															+            driver.find_element(By.XPATH, '//*[@text="爬虫群"]').click()
														
 
															+            driver.find_element(By.ID, 'com.tencent.mm:id/guw').click()
														
 
															+
														
 
															+            # 把视频信息写入飞书feeds文档
														
 
															+            Feishu.insert_columns(log_type, 'shipinhao', 'qzDljJ', 'ROWS', 1, 2)
														
 
															+            get_feeds_time = int(time.time())
														
 
															+            values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
														
 
															+                       '定向榜',
														
 
															+                       str(video_dict['video_title']),
														
 
															+                       int(video_dict['duration']),
														
 
															+                       int(video_dict['like_cnt']),
														
 
															+                       int(video_dict['share_cnt']),
														
 
															+                       int(video_dict['favorite_cnt']),
														
 
															+                       int(video_dict['comment_cnt']),
														
 
															+                       str(video_dict['user_name'])]]
														
 
															+            time.sleep(1)
														
 
															+            Feishu.update_values(log_type, 'shipinhao', 'qzDljJ', 'A2:Z2', values)
														
 
															+            Common.logger(log_type).info('视频信息写入飞书文档成功\n')
														
 
															+
														
 
															+            while True:
														
 
															+                if Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')[1][11] is None:
														
 
															+                    Common.logger(log_type).info('等待更新 URL 信息')
														
 
															+                    time.sleep(10)
														
 
															+                else:
														
 
															+                    Common.logger(log_type).info('URL 信息已更新\n')
														
 
															+                    break
														
 
															+
														
 
															+            cls.publish(log_type, env)
														
 
															+
														
 
															+    # 下载 、上传
														
 
															+    @classmethod
														
 
															+    def publish(cls, log_type, env):
														
 
															+        try:
														
 
															+            follow_feeds_sheet = Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')
														
 
															+            for i in range(1, len(follow_feeds_sheet)):
														
 
															+                download_title = follow_feeds_sheet[i][2].strip().replace('"', '') \
														
 
															+                    .replace('“', '').replace('“', '…').replace("\n", "") \
														
 
															+                    .replace("/", "").replace("\r", "") \
														
 
															+                    .replace(".", "。").replace("\\", "").replace("&NBSP", "") \
														
 
															+                    .replace(":", "").replace("*", "").replace("？", "") \
														
 
															+                    .replace("?", "").replace('"', "").replace("<", "") \
														
 
															+                    .replace(">", "").replace("|", "").replace(" ", "")
														
 
															+                download_duration = follow_feeds_sheet[i][3]
														
 
															+                download_like_cnt = follow_feeds_sheet[i][4]
														
 
															+                download_share_cnt = follow_feeds_sheet[i][5]
														
 
															+                download_favorite_cnt = follow_feeds_sheet[i][6]
														
 
															+                download_comment_cnt = follow_feeds_sheet[i][7]
														
 
															+                download_username = follow_feeds_sheet[i][8]
														
 
															+                download_head_url = follow_feeds_sheet[i][9]
														
 
															+                download_cover_url = follow_feeds_sheet[i][10]
														
 
															+                download_video_url = follow_feeds_sheet[i][11]
														
 
															+
														
 
															+                Common.logger(log_type).info("download_title:{}", download_title)
														
 
															+                Common.logger(log_type).info("download_username:{}", download_username)
														
 
															+                Common.logger(log_type).info("download_video_url:{}", download_video_url)
														
 
															+
														
 
															+                if download_title is None or download_duration is None or download_video_url is None:
														
 
															+                    Feishu.dimension_range(log_type, 'shipinhao', 'qzDljJ', 'ROWS', i + 1, i + 1)
														
 
															+                    Common.logger(log_type).info('空行，删除成功\n')
														
 
															+                    return
														
 
															+                else:
														
 
															+                    # 下载封面
														
 
															+                    Common.download_method(log_type=log_type, text="cover",
														
 
															+                                           d_name=str(download_title), d_url=str(download_cover_url))
														
 
															+                    # 下载视频
														
 
															+                    Common.download_method(log_type=log_type, text="video",
														
 
															+                                           d_name=str(download_title), d_url=str(download_video_url))
														
 
															+                    # 保存视频信息至 "./videos/{download_video_title}/info.txt"
														
 
															+                    with open("./videos/" + download_title
														
 
															+                              + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
														
 
															+                        f_a.write('shipinhao' + str(int(time.time())) + "\n" +
														
 
															+                                  str(download_title) + "\n" +
														
 
															+                                  str(download_duration) + "\n" +
														
 
															+                                  str(download_favorite_cnt) + "\n" +
														
 
															+                                  str(download_comment_cnt) + "\n" +
														
 
															+                                  str(download_like_cnt) + "\n" +
														
 
															+                                  str(download_share_cnt) + "\n" +
														
 
															+                                  str(1920 * 1080) + "\n" +
														
 
															+                                  str(int(time.time())) + "\n" +
														
 
															+                                  str(download_username) + "\n" +
														
 
															+                                  str(download_head_url) + "\n" +
														
 
															+                                  str(download_video_url) + "\n" +
														
 
															+                                  str(download_cover_url) + "\n" +
														
 
															+                                  "SHIPINHAO" + str(int(time.time())))
														
 
															+                    Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
														
 
															+
														
 
															+                    Common.logger(log_type).info("开始上传视频:{}".format(download_title))
														
 
															+                    our_video_id = Publish.upload_and_publish(log_type, env, "follow")
														
 
															+                    if env == 'dev':
														
 
															+                        our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
														
 
															+                            our_video_id) + "/info"
														
 
															+                    else:
														
 
															+                        our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
														
 
															+                            our_video_id) + "/info"
														
 
															+                    Common.logger(log_type).info("视频上传完成:{}", our_video_link)
														
 
															+
														
 
															+                    # 视频ID工作表，插入首行
														
 
															+                    Feishu.insert_columns(log_type, "shipinhao", "XxmRlE", "ROWS", 1, 2)
														
 
															+                    # 视频ID工作表，首行写入数据
														
 
															+                    upload_time = int(time.time())
														
 
															+                    values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
														
 
															+                               "定向榜",
														
 
															+                               str(download_title),
														
 
															+                               our_video_link,
														
 
															+                               download_duration,
														
 
															+                               download_like_cnt,
														
 
															+                               download_share_cnt,
														
 
															+                               download_favorite_cnt,
														
 
															+                               download_comment_cnt,
														
 
															+                               download_username,
														
 
															+                               str(download_head_url),
														
 
															+                               str(download_cover_url),
														
 
															+                               str(download_video_url)]]
														
 
															+                    time.sleep(1)
														
 
															+                    Feishu.update_values(log_type, "shipinhao", "XxmRlE", "F2:V2", values)
														
 
															+
														
 
															+                    # 删除行或列，可选 ROWS、COLUMNS
														
 
															+                    time.sleep(1)
														
 
															+                    Feishu.dimension_range(log_type, "shipinhao", "qzDljJ", "ROWS", i + 1, i + 1)
														
 
															+                    Common.logger(log_type).info("下载/上传成功\n")
														
 
															+                    return
														
 
															+        except Exception as e:
														
 
															+            Feishu.dimension_range(log_type, "shipinhao", "qzDljJ", "ROWS", 2, 2)
														
 
															+            Common.logger(log_type).error('download_publish异常，删除视频信息成功:{}\n', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def search_to_all_user_homepage(cls, log_type, env):
														
 
															+        user_list = cls.get_users_from_feishu(log_type)
														
 
															+        for user in user_list:
														
 
															+            cls.start_follow_wechat(log_type, user, env)
														
 
															+        Common.logger(log_type).info('所有用户已抓取完毕\n')
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    # print(Follow.get_users_from_feishu('follow'))
														
 
															+    # print(len(Follow.get_users_from_feishu('follow')))
														
 
															+    print(Follow.filter_words('follow'))
														
 
															+    # Follow.search_to_all_user_homepage('follow')
														
 
															+    pass
														
 
															+
														
--- a/shipinhao/shipinhao_publish.py
+++ b/shipinhao/shipinhao_publish.py
@@ -143,9 +143,9 @@ class Publish:
 
															     video_file = 'video'
														
 
															     image_file = 'image'
														
 
															     info_file = 'info'
														
 
															-    uids_dev_up = [6267140]
														
 
															+    uids_dev_follow = [6267140]
														
 
															     uids_dev_play = [6267141]
														
 
															-    uids_prod_up = [20631278, 20631279]
														
 
															+    uids_prod_follow = [26117633, 26117634, 26117635, 26117636, 26117637, 26117638, 26117639, 26117640, 26117641, 26117642]
														
 
															     uids_prod_play = [20631278, 20631279]
														
 
															     @classmethod
														
@@ -176,12 +176,12 @@ class Publish:
 
															                     now_timestamp = int(round(time.time() * 1000))
														
 
															                     data['crawlerTaskTimestamp'] = str(now_timestamp)
														
 
															                     global uid
														
 
															-                    if env == "dev" and job == "up":
														
 
															-                        uid = str(random.choice(cls.uids_dev_up))
														
 
															+                    if env == "dev" and job == "follow":
														
 
															+                        uid = str(random.choice(cls.uids_dev_follow))
														
 
															                     elif env == "dev" and job == "play":
														
 
															                         uid = str(random.choice(cls.uids_dev_play))
														
 
															-                    elif env == "prod" and job == "up":
														
 
															-                        uid = str(random.choice(cls.uids_prod_up))
														
 
															+                    elif env == "prod" and job == "follow":
														
 
															+                        uid = str(random.choice(cls.uids_prod_follow))
														
 
															                     elif env == "prod" and job == "play":
														
 
															                         uid = str(random.choice(cls.uids_prod_play))
														
 
															                     data['loginUid'] = uid
														
--- a/shipinhao/shipinhao_recommend.py
+++ b/shipinhao/shipinhao_recommend.py
@@ -29,13 +29,13 @@ class Recommend:
 
															                     cls.download_cnt = []
														
 
															                     return
														
 
															                 else:
														
 
															-                    cls.start_wechat(log_type, env)
														
 
															+                    cls.start_recommend_wechat(log_type, env)
														
 
															         except Exception as e:
														
 
															             Common.logger(log_type).error('run_recommend异常:{}\n', e)
														
 
															     # 启动微信，并打开视频号
														
 
															     @classmethod
														
 
															-    def start_wechat(cls, log_type, env):
														
 
															+    def start_recommend_wechat(cls, log_type, env):
														
 
															         try:
														
 
															             Common.logger(log_type).info('启动微信')
														
 
															             caps = {
														
--- a/shipinhao/windows_follow.py
+++ b/shipinhao/windows_follow.py
@@ -0,0 +1,168 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+# @Author: wangkun
														
 
															+# @Time: 2022/12/21
														
 
															+import json
														
 
															+import os
														
 
															+import sys
														
 
															+import time
														
 
															+import psutil as psutil
														
 
															+from appium import webdriver
														
 
															+from selenium.webdriver.common.by import By
														
 
															+sys.path.append(os.getcwd())
														
 
															+from main.feishu_lib import Feishu
														
 
															+from main.common import Common
														
 
															+
														
 
															+
														
 
															+class ShipinhaoWindows:
														
 
															+    @classmethod
														
 
															+    def kill_pid(cls, log_type):
														
 
															+        try:
														
 
															+            os.system('chcp 65001')  # 将cmd的显示字符编码从默认的GBK改为UTF-8
														
 
															+            list_process = list()
														
 
															+            pid_list = psutil.pids()
														
 
															+            for sub_pid in pid_list:
														
 
															+                try:
														
 
															+                    process_info = psutil.Process(sub_pid)
														
 
															+                    if process_info.name() == 'WechatBrowser.exe' or process_info.name() == 'WeChatPlayer.exe':
														
 
															+                        list_process.append(sub_pid)
														
 
															+                except (psutil.NoSuchProcess, psutil.AccessDenied, psutil.ZombieProcess):
														
 
															+                    pass
														
 
															+            for pid in list_process:
														
 
															+                os.system('taskkill /f /pid ' + str(pid))
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).error('kill_pid异常:{}', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def click_video(cls, log_type):
														
 
															+        try:
														
 
															+            Common.logger(log_type).info('启动"微信"')
														
 
															+            desired_caps = {'app': r"C:\Program Files (x86)\Tencent\WeChat\WeChat.exe"}
														
 
															+            driver = webdriver.Remote(
														
 
															+                command_executor='http://127.0.0.1:4723',
														
 
															+                desired_capabilities=desired_caps)
														
 
															+            driver.implicitly_wait(10)
														
 
															+
														
 
															+            # Common.logger(log_type).info('点击"聊天窗口"')
														
 
															+            # driver.find_element(By.NAME, '聊天').click()
														
 
															+            #
														
 
															+            # Common.logger(log_type).info('点击"爬虫群"')
														
 
															+            # driver.find_elements(By.NAME, '爬虫群')[0].click()
														
 
															+
														
 
															+            Common.logger(log_type).info('点击视频')
														
 
															+            driver.find_elements(By.NAME, '消息')[-1].click()
														
 
															+
														
 
															+            Common.logger(log_type).info('休眠 10 秒，退出视频号')
														
 
															+            time.sleep(10)
														
 
															+            cls.kill_pid(log_type)
														
 
															+
														
 
															+            Common.logger(log_type).info('退出微信')
														
 
															+            driver.quit()
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).error('click_video异常:{}', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def get_url(cls, log_type):
														
 
															+        try:
														
 
															+            # charles 抓包文件保存目录
														
 
															+            charles_file_dir = r"./chlsfiles/"
														
 
															+
														
 
															+            if len(os.listdir(charles_file_dir)) == 0:
														
 
															+                Common.logger(log_type).info("未找到chlsfile文件，等待2s")
														
 
															+                time.sleep(2)
														
 
															+            else:
														
 
															+                # 目标文件夹下所有文件
														
 
															+                all_file = sorted(os.listdir(charles_file_dir))
														
 
															+
														
 
															+                # 获取到目标文件
														
 
															+                old_file = all_file[-1]
														
 
															+
														
 
															+                # 分离文件名与扩展名
														
 
															+                new_file = os.path.splitext(old_file)
														
 
															+
														
 
															+                # 重命名文件后缀
														
 
															+                os.rename(os.path.join(charles_file_dir, old_file),
														
 
															+                          os.path.join(charles_file_dir, new_file[0] + ".txt"))
														
 
															+
														
 
															+                with open(charles_file_dir + new_file[0] + ".txt", encoding='utf-8-sig', errors='ignore') as f:
														
 
															+                    contents = json.load(f, strict=False)
														
 
															+
														
 
															+                video_url_list = []
														
 
															+                cover_url_list = []
														
 
															+
														
 
															+                if "finder.video.qq.com" in [text['host'] for text in contents]:
														
 
															+                    for text in contents:
														
 
															+                        if text["host"] == "finder.video.qq.com" and text["path"] == "/251/20302/stodownload":
														
 
															+                            video_url_list.append(text)
														
 
															+                        elif text["host"] == "finder.video.qq.com" and text["path"] == "/251/20304/stodownload":
														
 
															+                            cover_url_list.append(text)
														
 
															+
														
 
															+                    video_url = video_url_list[0]['host']+video_url_list[0]['path']+'?'+video_url_list[0]['query']
														
 
															+                    cover_url = cover_url_list[0]['host']+cover_url_list[0]['path']+'?'+cover_url_list[0]['query']
														
 
															+                    head_url = cover_url
														
 
															+
														
 
															+                    # print(f'video_url:{video_url}')
														
 
															+                    # print(f'cover_url:{cover_url}')
														
 
															+                    # print(f'head_url:{head_url}')
														
 
															+
														
 
															+                    return video_url, cover_url, head_url
														
 
															+                else:
														
 
															+                    Common.logger(log_type).info("未找到url")
														
 
															+                    return '未找到url'
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).exception("get_url异常：{}\n", e)
														
 
															+            return None
														
 
															+
														
 
															+    @classmethod
														
 
															+    def write_url(cls, log_type):
														
 
															+        try:
														
 
															+            while True:
														
 
															+                if Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')[1][11] is None:
														
 
															+                    Common.del_charles_files('follow')
														
 
															+                    cls.click_video(log_type)
														
 
															+                    Common.logger(log_type).info('等待 2s')
														
 
															+                    time.sleep(2)
														
 
															+                    Common.logger(log_type).info('获取视频头像/封面/播放地址')
														
 
															+                    urls = cls.get_url(log_type)
														
 
															+                    if urls == '未找到url':
														
 
															+                        time.sleep(1)
														
 
															+                        cls.write_url(log_type)
														
 
															+                    elif urls is None:
														
 
															+                        time.sleep(1)
														
 
															+                        cls.write_url(log_type)
														
 
															+                    else:
														
 
															+                        Feishu.update_values(log_type, 'shipinhao', 'qzDljJ', 'J2:L2',
														
 
															+                                             [['https://'+urls[2], 'https://'+urls[1], 'https://'+urls[0]]])
														
 
															+                        Common.logger(log_type).info('视频地址信息写入飞书成功\n')
														
 
															+                        Common.del_charles_files('follow')
														
 
															+                        break
														
 
															+                else:
														
 
															+                    Common.logger(log_type).info('视频已有地址信息，休眠 10s')
														
 
															+                    time.sleep(10)
														
 
															+                    break
														
 
															+        except Exception as e:
														
 
															+            # Feishu.dimension_range(log_type, 'shipinhao', 'qzDljJ', 'ROWS', 2, 2)
														
 
															+            Common.logger(log_type).error('write_url异常:{}\n', e)
														
 
															+
														
 
															+    @classmethod
														
 
															+    def run_get_url(cls, log_type):
														
 
															+        try:
														
 
															+            while True:
														
 
															+                if len(Feishu.get_values_batch(log_type, 'shipinhao', 'qzDljJ')) == 1:
														
 
															+                    Common.logger(log_type).info('暂无需要获取地址的视频信息')
														
 
															+                    time.sleep(30)
														
 
															+                    break
														
 
															+                else:
														
 
															+                    cls.write_url(log_type)
														
 
															+
														
 
															+        except Exception as e:
														
 
															+            Common.logger(log_type).error('run_get_url异常:{}\n', e)
														
 
															+
														
 
															+
														
 
															+if __name__ == '__main__':
														
 
															+    while True:
														
 
															+        ShipinhaoWindows.run_get_url('follow')
														
 
															+        Common.del_logs('follow')
														
 
															+        time.sleep(1)
														
 
															+
														
 
															+    pass
														
--- a/shipinhao/windows_recommend.py
+++ b/shipinhao/windows_recommend.py