wangkun 2 年之前
父节点
当前提交
11a41fa4e0
共有 7 个文件被更改,包括 34 次插入23 次删除
  1. 4 1
      README.md
  2. 二进制
      chlsfiles/.DS_Store
  3. 0 0
      chlsfiles/charles202209191324.chlsj
  4. 7 7
      main/run_shipinhao_recommend.py
  5. 5 7
      main/shipinhao_get_url.py
  6. 18 3
      main/shipinhao_recommend.py
  7. 0 5
      main/shipinhao_topic.py

+ 4 - 1
README.md

@@ -24,7 +24,10 @@
 
 
 #### 需求
-2022/9//27
+2022/10/11
+1. 推荐榜,每日入库条数限制:100
+
+2022/9/27
 1. 新增按照话题抓取
 
 2022/9/14

二进制
chlsfiles/.DS_Store


文件差异内容过多而无法显示
+ 0 - 0
chlsfiles/charles202209191324.chlsj


+ 7 - 7
main/run_shipinhao_recommend.py

@@ -6,10 +6,7 @@ import os
 import sys
 import time
 sys.path.append(os.getcwd())
-# from crawler_shipinhao.main.common import Common
-# from crawler_shipinhao.main.shipinhao_recommend import Recommend
 from main.common import Common
-from main.download_publish import Download
 from main.shipinhao_recommend import Recommend
 
 
@@ -18,10 +15,13 @@ class Main:
     def main(cls, env):
         while True:
             if 20 >= datetime.datetime.now().hour >= 10:
-                Recommend.start_wechat('recommend')
-                Download.run_download_publish('recommend', env)
-                Common.del_logs('recommend')
-                time.sleep(5)
+                if len(Recommend.download_cnt) >= 100:
+                    Recommend.download_cnt = []
+                    Common.del_logs('recommend')
+                    time.sleep(3600 * (24-datetime.datetime.now().hour))
+                else:
+                    Recommend.start_wechat('recommend', env)
+                    time.sleep(3)
             else:
                 pass
 

+ 5 - 7
main/shipinhao_get_url.py

@@ -42,14 +42,13 @@ class GetUrl:
                 desired_capabilities=desired_caps)
             driver.implicitly_wait(10)
 
-            Common.logger(log_type).info('点击"聊天窗口"')
-            driver.find_element(By.NAME, '聊天').click()
-
-            Common.logger(log_type).info('点击"爬虫群"')
-            driver.find_elements(By.NAME, '爬虫群')[0].click()
+            # Common.logger(log_type).info('点击"聊天窗口"')
+            # driver.find_element(By.NAME, '聊天').click()
+            #
+            # Common.logger(log_type).info('点击"爬虫群"')
+            # driver.find_elements(By.NAME, '爬虫群')[0].click()
 
             Common.logger(log_type).info('点击视频')
-            # driver.find_elements(By.ID, '42.131546.3.3981')[-1].click()
             driver.find_elements(By.NAME, '消息')[-1].click()
 
             Common.logger(log_type).info('退出视频号')
@@ -124,7 +123,6 @@ class GetUrl:
                     Common.logger(log_type).info('等待 2s')
                     time.sleep(2)
                     Common.logger(log_type).info('获取视频头像/封面/播放地址')
-                    # print('获取视频头像/封面/播放地址')
                     urls = cls.get_url(log_type)
                     if urls == '未找到url':
                         time.sleep(1)

+ 18 - 3
main/shipinhao_recommend.py

@@ -14,10 +14,12 @@ from main.feishu_lib import Feishu
 
 
 class Recommend:
+    # 当日已下载数量
+    download_cnt = []
 
     # 启动微信,并打开视频号
     @classmethod
-    def start_wechat(cls, log_type):
+    def start_wechat(cls, log_type, env):
         try:
             Common.logger(log_type).info('启动微信')
             caps = {
@@ -46,7 +48,7 @@ class Recommend:
             driver.find_elements(By.ID, 'com.tencent.mm:id/gv6')[1].click()
             time.sleep(5)
 
-            cls.get_feeds(log_type, driver)
+            cls.get_feeds(log_type, driver, env)
 
             Common.logger(log_type).info('休眠 3s')
             time.sleep(3)
@@ -83,7 +85,7 @@ class Recommend:
 
     # 操作安卓手机,自己滑动首页视频,并获取视频信息
     @classmethod
-    def get_feeds(cls, log_type, driver: WebDriver):
+    def get_feeds(cls, log_type, driver: WebDriver, env):
         try:
             driver.implicitly_wait(10)
             for i in range(5):
@@ -220,8 +222,19 @@ class Recommend:
                             Common.logger(log_type).info('URL 信息已更新,滑动到下一个视频\n')
                             driver.swipe(10, 1600, 10, 300, 200)
                             break
+
+                # # 下载该视频
+                # if len(cls.download_cnt) >= 100:
+                #     Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", 2, 2)
+                #     return
+                # else:
+                #     cls.download_publish(log_type, env)
+
+                cls.download_publish(log_type, env)
+
         except Exception as e:
             Common.logger(log_type).error('get_feeds异常,滑动到下一个视频\n', e)
+            Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", 2, 2)
             driver.swipe(10, 1600, 10, 300, 200)
 
     # 下载 、上传
@@ -313,6 +326,8 @@ class Recommend:
                     time.sleep(1)
                     Feishu.update_values(log_type, "shipinhao", "c77cf9", "F2:V2", values)
 
+                    cls.download_cnt.append(download_title)
+
                     # 删除行或列,可选 ROWS、COLUMNS
                     time.sleep(1)
                     Feishu.dimension_range(log_type, "shipinhao", "FSDlBy", "ROWS", i + 1, i + 1)

+ 0 - 5
main/shipinhao_topic.py

@@ -81,7 +81,6 @@ class Topic:
                 'enableWebviewDetailsCollection': True,
                 'setWebContentsDebuggingEnabled': True
             }
-            # global driver
             driver = webdriver.Remote("http://localhost:4723/wd/hub", caps)
             driver.implicitly_wait(10)
 
@@ -201,10 +200,7 @@ class Topic:
                 Common.logger(log_type).info('video_title:{}', video_title)
                 Common.logger(log_type).info('user_name:{}', user_name)
                 Common.logger(log_type).info('duration:{}', duration)
-                # Common.logger(log_type).info('like_cnt:{}', like_cnt)
-                # Common.logger(log_type).info('share_cnt:{}', share_cnt)
                 Common.logger(log_type).info('favorite_cnt:{}', favorite_cnt)
-                # Common.logger(log_type).info('comment_cnt:{}', comment_cnt)
 
                 # 判断无效视频
                 if video_title == '' or user_name == '':
@@ -295,7 +291,6 @@ class Topic:
             Common.logger(log_type).info('查找所有搜索历史')
             search_words = driver.find_elements(By.XPATH, '//*[@class="history__item__text"]')
             for search_word in search_words:
-                # print(f'search_word:{search_word.text}')
                 if search_word.text == topic:
                     search_word.click()
 

部分文件因为文件数量过多而无法显示