wangkun 2 lat temu
rodzic
commit
3487d6009d

BIN
.DS_Store


+ 1 - 0
README.MD

@@ -223,4 +223,5 @@ ps aux | grep run_zhiqingtiantiankan | grep -v grep | awk '{print $2}' | xargs k
 ps aux | grep search_key_mac | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep gongzhonghao | grep -v grep | awk '{print $2}' | xargs kill -9
 ps aux | grep xiaoniangao | grep -v grep | awk '{print $2}' | xargs kill -9
+ps aux | grep Appium.app | grep -v grep | awk '{print $2}' | xargs kill -9
 ```

+ 7 - 1
common/public.py

@@ -49,6 +49,7 @@ def get_config_from_mysql(log_type, source, env, text, action=''):
     title_list = []
     filter_list = []
     emoji_list = []
+    search_word_list = []
     for content in contents:
         config = content['config']
         config_dict = eval(config)
@@ -65,13 +66,18 @@ def get_config_from_mysql(log_type, source, env, text, action=''):
                 emoji_list_config = v.split(",")
                 for emoji in emoji_list_config:
                     emoji_list.append(emoji)
+            if k == "search_word":
+                search_word_list_config = v.split(",")
+                for search_word in search_word_list_config:
+                    search_word_list.append(search_word)
     if text == "title":
         return title_list
     elif text == "filter":
         return filter_list
     elif text == "emoji":
         return emoji_list
-
+    elif text == "search_word":
+        return search_word_list
 
 def random_title(log_type, crawler, env, text):
     random_title_list = get_config_from_mysql(log_type, crawler, env, text)

+ 1 - 1
kuaishou/kuaishou_recommend/kuaishou_recommend_shceduling.py

@@ -191,7 +191,7 @@ class KuaiShouRecommendScheduling:
             headers = {
                 'Accept-Language': 'zh-CN,zh;q=0.9',
                 'Connection': 'keep-alive',
-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_aba004b1780f4d7174d0a2ff42da1f{r}7; kpn=KUAISHOU_VISION;'.format(
+                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_7cdc486ebd1aba220455a7781d6ae5b5{r}7; kpn=KUAISHOU_VISION;'.format(
                     r=r),
                 'Origin': 'https://www.kuaishou.com',
                 'Referer': 'https://www.kuaishou.com/new-reco',

+ 1 - 1
kuaishou/kuaishou_recommend/recommend_kuaishou.py

@@ -189,7 +189,7 @@ class KuaiShouRecommend:
             headers = {
                 'Accept-Language': 'zh-CN,zh;q=0.9',
                 'Connection': 'keep-alive',
-                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_aba004b1780f4d7174d0a2ff42da1f{r}7; kpn=KUAISHOU_VISION;'.format(
+                'Cookie': 'kpf=PC_WEB; clientid=3; did=web_7cdc486ebd1aba220455a7781d6ae5b5{r}7; kpn=KUAISHOU_VISION;'.format(
                     r=r),
                 'Origin': 'https://www.kuaishou.com',
                 'Referer': 'https://www.kuaishou.com/new-reco',

BIN
shipinhao/.DS_Store


+ 3 - 0
shipinhao/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/25

BIN
shipinhao/shipinhao_search/.DS_Store


+ 3 - 0
shipinhao/shipinhao_search/__init__.py

@@ -0,0 +1,3 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/25

+ 245 - 0
shipinhao/shipinhao_search/shipinhao_search.py

@@ -0,0 +1,245 @@
+# -*- coding: utf-8 -*-
+# @Author: wangkun
+# @Time: 2023/4/25
+import difflib
+import os
+import sys
+import time
+from hashlib import md5
+from appium import webdriver
+from appium.webdriver.extensions.android.nativekey import AndroidKey
+from appium.webdriver.webdriver import WebDriver
+from selenium.common import NoSuchElementException
+from selenium.webdriver.common.by import By
+sys.path.append(os.getcwd())
+from common.feishu import Feishu
+from common.publish import Publish
+from common.common import Common
+from common.public import get_config_from_mysql
+
+
+class ShipinhaoSearch:
+    i = 0
+
+    @classmethod
+    def start_wechat(cls, log_type, crawler, sheetid, env):
+        # try:
+        Common.logger(log_type, crawler).info('启动微信')
+        if env == "dev":
+            chromedriverExecutable = "/Users/wangkun/Downloads/chromedriver/chromedriver_v86/chromedriver"
+        else:
+            chromedriverExecutable = '/Users/piaoquan/Downloads/chromedriver'
+        caps = {
+            "platformName": "Android",  # 手机操作系统 Android / iOS
+            "deviceName": "Android",  # 连接的设备名(模拟器或真机),安卓可以随便写
+            "platforVersion": "11",  # 手机对应的系统版本(Android 11)
+            "appPackage": "com.tencent.mm",  # 被测APP的包名,乐活圈 Android
+            "appActivity": ".ui.LauncherUI",  # 启动的Activity名
+            "autoGrantPermissions": True,  # 让 appium 自动授权 base 权限,
+            # 如果 noReset 为 True,则该条不生效(该参数为 Android 独有),对应的值为 True 或 False
+            "unicodekeyboard": True,  # 使用自带输入法,输入中文时填True
+            "resetkeyboard": True,  # 执行完程序恢复原来输入法
+            "noReset": True,  # 不重置APP
+            "recreateChromeDriverSessions": True,  # 切换到非 chrome-Driver 会 kill 掉 session,就不需要手动 kill 了
+            "printPageSourceOnFailure": True,  # 找不到元素时,appium log 会完整记录当前页面的 pagesource
+            "newCommandTimeout": 6000,  # 初始等待时间
+            "automationName": "UiAutomator2",  # 使用引擎,默认为 Appium,
+            # 其中 Appium、UiAutomator2、Selendroid、Espresso 用于 Android,XCUITest 用于 iOS
+            "showChromedriverLog": True,
+            "chromeOptions": {"androidProcess": "com.tencent.mm:appbrand0"},
+            # "chromeOptions": {"androidProcess": "com.tencent.mm:tools"},
+            'enableWebviewDetailsCollection': True,
+            'setWebContentsDebuggingEnabled': True,
+            'chromedriverExecutable': chromedriverExecutable,
+        }
+        driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
+        driver.implicitly_wait(10)
+
+        time.sleep(5)
+        # 根据词搜索视频
+        cls.search_video(log_type=log_type,
+                         crawler=crawler,
+                         driver=driver,
+                         sheetid=sheetid,
+                         env=env)
+
+        # except Exception as e:
+        #     Common.logger(log_type, crawler).error('start_wechat异常,重启APP:{}\n', e)
+
+    # 查找元素
+    @classmethod
+    def search_elements(cls, driver: WebDriver, xpath):
+        time.sleep(1)
+        windowHandles = driver.window_handles
+        for handle in windowHandles:
+            driver.switch_to.window(handle)
+            time.sleep(1)
+            try:
+                elements = driver.find_elements(By.XPATH, xpath)
+                if elements:
+                    return elements
+            except NoSuchElementException:
+                pass
+
+    @classmethod
+    def check_to_webview(cls, log_type, crawler, driver: WebDriver):
+        while True:
+            Common.logger(log_type, crawler).info('切换到webview')
+            webview = driver.contexts
+            driver.switch_to.context(webview[1])
+
+    @classmethod
+    def search_video(cls, log_type, crawler, driver: WebDriver, sheetid, env):
+        word_list = get_config_from_mysql(log_type, crawler, env, "search_word", action="")
+        for word in word_list:
+            driver.implicitly_wait(10)
+            Common.logger(log_type, crawler).info('点击搜索按钮')
+            driver.find_element(By.ID, 'com.tencent.mm:id/j5t').click()
+            time.sleep(0.5)
+            Common.logger(log_type, crawler).info(f'输入视频标题:{word}')
+            driver.find_element(By.ID, 'com.tencent.mm:id/cd7').clear().send_keys(word)
+            Common.logger(log_type, crawler).info('点击搜索')
+            driver.press_keycode(AndroidKey.ENTER)
+            driver.find_elements(By.ID, 'com.tencent.mm:id/oi4')[0].click()
+
+            cls.check_to_webview(log_type, crawler, driver)
+
+            video_list = cls.search_elements(driver, '//div[@class="unit"]/*[2]')
+            Common.logger(log_type, crawler).info('点击"视频号"分类')
+            video_list[0].click()
+            time.sleep(5)
+
+            index = 0
+            while True:
+                if index == 30:
+                    Common.logger(log_type, crawler).info(f'"{word}"已抓取视频数:{index}\n')
+                    break
+                try:
+                    if cls.search_elements(driver, '//*[@class="double-rich double-rich_vertical"]') is None:
+                        Common.logger(log_type, crawler).info('窗口已销毁\n')
+                        return
+
+                    Common.logger(log_type, crawler).info('获取视频列表\n')
+                    video_elements = cls.search_elements(driver, '//wx-view[@class="double-rich double-rich_vertical"]')
+                    if video_elements is None:
+                        Common.logger(log_type, crawler).warning(f'video_elements:{video_elements}')
+                        return
+
+                    video_element_temp = video_elements[index:]
+                    if len(video_element_temp) == 0:
+                        Common.logger(log_type, crawler).info('到底啦~~~~~~~~~~~~~\n')
+                        return
+
+                    for i, video_element in enumerate(video_element_temp):
+                        if video_element is None:
+                            Common.logger(log_type, crawler).info('到底啦~\n')
+                            return
+                        cls.i += 1
+                        cls.search_elements(driver, '//wx-view[@"double-rich double-rich_vertical"]')
+
+                        Common.logger(log_type, crawler).info(f'拖动"视频"列表第{cls.i}个至屏幕中间')
+                        time.sleep(3)
+                        driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})",
+                                              video_element)
+                        Common.logger(log_type, crawler).info("点击进入视频详情")
+                        video_element.click()
+                        time.sleep(3)
+                        cls.get_video_info(log_type=log_type,
+                                           crawler=crawler,
+                                           driver=driver,
+                                           sheetid=sheetid)
+                    Common.logger(log_type, crawler).info('已抓取完一组视频,休眠10秒\n')
+                    time.sleep(10)
+                    index = index + len(video_element_temp)
+                except Exception as e:
+                    Common.logger(log_type, crawler).info(f"get_videoList:{e}\n")
+                    cls.i = 0
+
+    @classmethod
+    def get_video_info(cls, log_type, crawler, driver: WebDriver, sheetid):
+        # webview = driver.contexts
+        Common.logger(log_type, crawler).info('切回NATIVE_APP\n')
+        driver.switch_to.context('NATIVE_APP')
+
+        # 点击播放器,获取视频时长
+        # Common.logger(log_type, crawler).info('暂停播放')
+        pause_btn = driver.find_element(By.ID, 'com.tencent.mm:id/eh4')
+        pause_btn.click()
+        start_time = driver.find_element(By.ID, 'com.tencent.mm:id/l59').get_attribute('name')
+        start_time = int(start_time.split(':')[0]) * 60 + int(start_time.split(':')[-1])
+        try:
+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/l7i').get_attribute('name')
+        except NoSuchElementException:
+            end_time = driver.find_element(By.ID, 'com.tencent.mm:id/g73').get_attribute('name')
+        end_time = int(end_time.split(':')[0]) * 60 + int(end_time.split(':')[-1])
+        duration = start_time + end_time
+
+        # 点赞
+        like_id = driver.find_element(By.ID, 'com.tencent.mm:id/k04')
+        like_cnt = like_id.get_attribute('name')
+        if like_cnt == "" or like_cnt == "喜欢":
+            like_cnt = 0
+        elif '万' in like_cnt:
+            like_cnt = float(like_cnt.split('万')[0]) * 10000
+        elif '万+' in like_cnt:
+            like_cnt = float(like_cnt.split('万+')[0]) * 10000
+        else:
+            like_cnt = float(like_cnt)
+
+        # 分享
+        share_id = driver.find_element(By.ID, 'com.tencent.mm:id/jhv')
+        share_cnt = share_id.get_attribute('name')
+        if share_cnt == "" or share_cnt == "转发":
+            share_cnt = 0
+        elif '万' in share_cnt:
+            share_cnt = float(share_cnt.split('万')[0]) * 10000
+        elif '万+' in share_cnt:
+            share_cnt = float(share_cnt.split('万+')[0]) * 10000
+        else:
+            share_cnt = float(share_cnt)
+
+        # 收藏
+        favorite_id = driver.find_element(By.ID, 'com.tencent.mm:id/fnp')
+        favorite_cnt = favorite_id.get_attribute('name')
+        if favorite_cnt == "" or favorite_cnt == "收藏":
+            favorite_cnt = 0
+        elif '万' in favorite_cnt:
+            favorite_cnt = float(favorite_cnt.split('万')[0]) * 10000
+        elif '万+' in favorite_cnt:
+            favorite_cnt = float(favorite_cnt.split('万+')[0]) * 10000
+        else:
+            favorite_cnt = float(favorite_cnt)
+
+        # 评论
+        comment_id = driver.find_element(By.ID, 'com.tencent.mm:id/bje')
+        comment_cnt = comment_id.get_attribute('name')
+        if comment_cnt == "" or comment_cnt == "评论":
+            comment_cnt = 0
+        elif '万' in comment_cnt:
+            comment_cnt = float(comment_cnt.split('万')[0]) * 10000
+        elif '万+' in comment_cnt:
+            comment_cnt = float(comment_cnt.split('万+')[0]) * 10000
+        else:
+            comment_cnt = float(comment_cnt)
+
+        # 把视频信息写入飞书feeds文档
+        values = [[time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(int(time.time()))),
+                   "视频号搜索",
+                   "标题",
+                   duration,
+                   like_cnt,
+                   share_cnt,
+                   favorite_cnt,
+                   comment_cnt]]
+        time.sleep(1)
+        Feishu.update_values(log_type, crawler, sheetid, 'F2:Z2', values)
+        Common.logger(log_type, crawler).info('视频信息写入飞书文档成功\n')
+
+
+if __name__ == '__main__':
+    ShipinhaoSearch.start_wechat(log_type="search",
+                                 crawler="shipinhao",
+                                 sheetid="xYWCzf",
+                                 env="dev")
+
+    pass