wangkun 2 年 前
コミット
9c18c5770b
3 ファイル変更131 行追加150 行削除
  1. 4 0
      README.md
  2. 2 3
      main/run_zqttk_recommend.py
  3. 125 147
      main/zqttk_recommend.py

+ 4 - 0
README.md

@@ -3,6 +3,7 @@
 1. git:https://git.yishihui.com/Server/crawler_zhiqingzongqun.git
 2. feishu:https://w42nne6hzg.feishu.cn/sheets/shtcnjmhKdJOKdqnEzJcZb5xaHc?
 
+
 #### 软件架构
 
 1. python==3.10
@@ -14,12 +15,15 @@
 7. selenium==4.4.3
 8. urllib3==1.26.9
 
+
 #### 使用说明
 
 1. cd ./crawler_zhiqingzongqun
 2. python3 ./main/run_xx.py
 
+
 #### 需求
+
 2022/10/19
 1. 抓取小程序更换: 知青天天看
 2. 抓取时间: 17:00 - 8:00

+ 2 - 3
main/run_zhiqingzongqun_recommend.py → main/run_zqttk_recommend.py

@@ -6,16 +6,15 @@ import os
 import sys
 sys.path.append(os.getcwd())
 from main.common import Common
-from main.zhiqingzongqun_recommend import Recommend
+from main.zqttk_recommend import Recommend
 
 
 class Main:
     @classmethod
     def main(cls, log_type, env):
         while True:
-            # if datetime.datetime.now().hour >= 0:
             if 23 >= datetime.datetime.now().hour >= 17 or datetime.datetime.now().hour <= 8:
-                Common.logger(log_type).info('开始抓取知青总群\n')
+                Common.logger(log_type).info('开始抓取知青天天看\n')
                 Recommend.start_wechat(log_type, env)
                 Recommend.i = 0
                 Common.del_logs(log_type)

+ 125 - 147
main/zhiqingzongqun_recommend.py → main/zqttk_recommend.py

@@ -11,9 +11,6 @@ from appium.webdriver.common.touch_action import TouchAction
 from appium.webdriver.webdriver import WebDriver
 from selenium.common.exceptions import NoSuchElementException
 from selenium.webdriver.common.by import By
-# from selenium.webdriver.support.wait import WebDriverWait
-# from selenium.webdriver.support import expected_conditions as EC
-
 sys.path.append(os.getcwd())
 from main.common import Common
 from main.publish import Publish
@@ -102,10 +99,8 @@ class Recommend:
             driver.swipe(int(size['width'] * 0.5), int(size['height'] * 0.2), int(size['width'] * 0.5),
                          int(size['height'] * 0.8), 200)
             # 打开小程序"知青总群"
-            time.sleep(3)
-            # Common.logger(log_type).info('打开小程序"知青总群"')
+            time.sleep(5)
             Common.logger(log_type).info('打开小程序"知青天天看"')
-            # driver.find_elements(By.XPATH, '//*[@text="知青总群"]')[-1].click()
             driver.find_elements(By.XPATH, '//*[@text="知青天天看"]')[-1].click()
             # 获取视频信息
             time.sleep(1)
@@ -168,6 +163,7 @@ class Recommend:
                 except Exception as e:
                     Common.logger(log_type).warning('切换到小程序失败,重启APP:{}\n', e)
                     cls.quit(log_type, driver)
+                    cls.i = 0
                     cls.start_wechat(log_type, env)
 
             # Common.logger(log_type).info('点击"换"按钮')
@@ -180,12 +176,21 @@ class Recommend:
             Common.logger(log_type).info('获取推荐列表视频信息')
             while True:
                 cls.i += 1
-                Common.logger(log_type).info('正在获取第{}条视频信息', cls.i)
                 recommend_handles = driver.window_handles
                 for recommend_handle in recommend_handles:
                     try:
                         driver.switch_to.window(recommend_handle)
 
+                        # ad
+                        try:
+                            ad = driver.find_element(
+                                By.XPATH,
+                                '//*[@class="videolistbox videolist--videolistbox"]'
+                                '/*[' + str(cls.i) + ']//*[@class="ad-_banner-_-full"]'
+                            )
+                        except NoSuchElementException:
+                            ad = 0
+
                         # video_title
                         try:
                             title = driver.find_element(
@@ -193,7 +198,7 @@ class Recommend:
                                 '//*[@class="videolistbox videolist--videolistbox"]'
                                 '/*[' + str(cls.i) + ']//*[@class="video_title videolist--video_title"]')
                             # 向上滚动至-元素可见
-                            Common.logger(log_type).info('滑动视频标题至屏幕中间')
+                            # Common.logger(log_type).info('滑动视频标题至屏幕中间')
                             driver.execute_script(
                                 "arguments[0].scrollIntoView({block:'center',inline:'center'})", title)
                             video_title = title.get_attribute('innerHTML')
@@ -222,59 +227,143 @@ class Recommend:
                             # Common.logger(log_type).error('cover_url异常:{}', e)
                             cover_url = 0
 
-                        # video_url
-                        if title == 0 or video_title == 0:
-                            video_url = 0
-                        elif '精美图文' in video_title:
-                            video_url = 0
+                        if ad != 0:
+                            Common.logger(log_type).info('正在获取第{}条:广告\n', cls.i)
+                            break
+                        elif video_title == 0:
+                            pass
                         else:
-                            video_url = cls.get_url(log_type, driver, video_title, title)
-
-                        Common.logger(log_type).info('video_title:{}', video_title)
-                        Common.logger(log_type).info('play_cnt:{}', play_cnt)
-                        Common.logger(log_type).info('video_url:{}', video_url)
+                            Common.logger(log_type).info('正在获取第{}条:{}', cls.i, video_title)
 
-                        if video_title == 0 or cover_url == 0 or video_url == 0:
+                        if video_title == 0 or cover_url == 0:
                             Common.logger(log_type).info('无效视频\n')
                         elif '精美图文' in video_title:
                             Common.logger(log_type).info('精美图文\n')
                         elif any(word if word in video_title else False for word in cls.filter_words(log_type)) is True:
                             Common.logger(log_type).info('视频已中过滤词:{}\n', video_title)
-                            driver.press_keycode(4)
+                            # driver.press_keycode(4)
                         elif video_title in [x for y in Feishu.get_values_batch(
                                 log_type, 'zhiqingzongqun', 'Z48hlq') for x in y]:
                             Common.logger(log_type).info('视频已存在\n')
-                            driver.press_keycode(4)
+                            # driver.press_keycode(4)
                         elif video_title in [x for y in Feishu.get_values_batch(
                                 log_type, 'zhiqingzongqun', '1a88b3') for x in y]:
                             Common.logger(log_type).info('视频已下载\n')
-                            driver.press_keycode(4)
+                            # driver.press_keycode(4)
                         else:
-                            Feishu.insert_columns(log_type, 'zhiqingzongqun', 'Z48hlq', 'ROWS', 1, 2)
-                            get_feeds_time = int(time.time())
-                            values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
-                                       '推荐榜',
-                                       str(video_title),
-                                       play_cnt,
-                                       cover_url,
-                                       video_url]]
-                            time.sleep(1)
-                            Feishu.update_values(log_type, 'zhiqingzongqun', 'Z48hlq', 'A2:Z2', values)
-                            Common.logger(log_type).info('视频信息写入飞书文档成功\n')
-                            driver.press_keycode(4)
+                            # video_url
+                            video_url = cls.get_url(log_type, driver, video_title, title)
 
-                        cls.download_publish(log_type, env)
+                            Common.logger(log_type).info('play_cnt:{}', play_cnt)
+                            Common.logger(log_type).info('video_url:{}', video_url)
+
+                            # 下载视频
+                            Common.download_method(log_type, 'video', video_title, video_url)
+                            # 获取视频时长
+                            video_info = cls.get_video_info_from_local(
+                                "./videos/" + video_title + "/video.mp4")
+                            download_width = str(video_info[0])
+                            download_height = str(video_info[1])
+                            download_duration = video_info[2]
+                            # 视频时长<60s,直接删除
+                            if int(download_duration) < 60:
+                                # 删除视频文件夹
+                                shutil.rmtree("./videos/" + video_title + "/")
+                                Common.logger(log_type).info("时长:{}<60秒,删除成功\n", int(download_duration))
+                                return
+                            else:
+                                # 下载封面
+                                Common.download_method(log_type, 'cover', video_title, cover_url)
+                                # 保存视频信息至 "./videos/{download_video_title}/info.txt"
+                                with open("./videos/" + video_title
+                                          + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
+                                    f_a.write(str(int(time.time())) + "\n" +
+                                              str(video_title) + "\n" +
+                                              str(int(download_duration)) + "\n" +
+                                              str(int(float(
+                                                  play_cnt.split(' ')[-1].split('万')[0]) * 10000)) + "\n" +
+                                              '0' + "\n" +
+                                              '0' + "\n" +
+                                              '0' + "\n" +
+                                              str(download_width) + '*' + str(download_height) + "\n" +
+                                              str(int(time.time())) + "\n" +
+                                              '知青天天看' + "\n" +
+                                              str(cover_url) + "\n" +
+                                              str(video_url) + "\n" +
+                                              str(cover_url) + "\n" +
+                                              "zhiqingzongqun" + str(int(time.time())))
+                                Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
+
+                                # 上传视频
+                                Common.logger(log_type).info("开始上传视频:{}".format(video_title))
+                                if env == 'dev' and int(download_width) >= int(download_height):
+                                    our_video_id = Publish.upload_and_publish(log_type, env, "width")
+                                    our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
+                                        our_video_id) + "/info"
+                                elif env == 'dev' and int(download_width) < int(download_height):
+                                    our_video_id = Publish.upload_and_publish(log_type, env, "height")
+                                    our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
+                                        our_video_id) + "/info"
+                                elif env == 'prod' and int(download_width) >= int(download_height):
+                                    our_video_id = Publish.upload_and_publish(log_type, env, "width")
+                                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
+                                        our_video_id) + "/info"
+                                elif env == 'prod' and int(download_width) < int(download_height):
+                                    our_video_id = Publish.upload_and_publish(log_type, env, "height")
+                                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
+                                        our_video_id) + "/info"
+                                else:
+                                    our_video_id = Publish.upload_and_publish(log_type, env, "width")
+                                    our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
+                                        our_video_id) + "/info"
+                                Common.logger(log_type).info("视频上传完成:{}", video_title)
+
+                                # 保存视频 ID 到已下载表
+                                Common.logger(log_type).info("保存视频至已下载表:{}", video_title)
+                                # 视频ID工作表,插入首行
+                                Feishu.insert_columns(log_type, "zhiqingzongqun", "1a88b3", "ROWS", 1, 2)
+                                # 视频ID工作表,首行写入数据
+                                upload_time = int(time.time())
+                                values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
+                                           "推荐榜",
+                                           video_title,
+                                           our_video_link,
+                                           play_cnt,
+                                           int(download_duration),
+                                           str(download_width) + '*' + str(download_height),
+                                           cover_url,
+                                           video_url]]
+                                time.sleep(1)
+                                Feishu.update_values(log_type, "zhiqingzongqun", "1a88b3", "F2:V2", values)
+                                Common.logger(log_type).info("视频:{},下载/上传成功\n", video_title)
+                                driver.press_keycode(4)
+
+                            # Feishu.insert_columns(log_type, 'zhiqingzongqun', 'Z48hlq', 'ROWS', 1, 2)
+                            # get_feeds_time = int(time.time())
+                            # values = [[time.strftime('%Y/%m/%d %H:%M:%S', time.localtime(get_feeds_time)),
+                            #            '推荐榜',
+                            #            str(video_title),
+                            #            play_cnt,
+                            #            cover_url,
+                            #            video_url]]
+                            # time.sleep(1)
+                            # Feishu.update_values(log_type, 'zhiqingzongqun', 'Z48hlq', 'A2:Z2', values)
+                            # Common.logger(log_type).info('视频信息写入飞书文档成功\n')
+                            # driver.press_keycode(4)
+                            # cls.download_publish(log_type, env)
+                            break
                     except Exception:
                         # Common.logger(log_type).error('switch_to.window(recommend_handle)异常:{}', e)
                         pass
 
-                if cls.i == 200:
+                if cls.i == 2000:
                     cls.i = 0
                     break
 
         except Exception as e:
             Common.logger(log_type).error('get_recommend异常:{},重启 APP\n', e)
             cls.quit(log_type, driver)
+            cls.i = 0
             cls.start_wechat(log_type, env)
 
     @classmethod
@@ -302,117 +391,6 @@ class Recommend:
         except Exception as e:
             Common.logger(log_type).error('get_url异常:{}\n', e)
 
-    @classmethod
-    def download_publish(cls, log_type, env):
-        try:
-            recommend_sheet = Feishu.get_values_batch(log_type, 'zhiqingzongqun', 'Z48hlq')
-            for i in range(1, len(recommend_sheet)):
-                download_video_title = recommend_sheet[i][2]
-                download_play_cnt = recommend_sheet[i][3]
-                download_cover_url = recommend_sheet[i][4]
-                download_video_url = recommend_sheet[i][5]
-                download_comment_cnt = 0
-                download_like_cnt = 0
-                download_share_cnt = 0
-                download_user_name = '知青天天看'
-                download_head_url = download_cover_url
-
-                if download_video_title is None or download_video_url is None:
-                    Feishu.dimension_range(log_type, 'zhiqingqongqun', 'Z48hlq', 'ROWS', i + 1, i + 1)
-                    Common.logger(log_type).info('空行,删除成功\n')
-                elif download_video_title in [x for y in Feishu.get_values_batch(log_type, 'zhiqingzongqun', '1a88b3')
-                                              for x in y]:
-                    Feishu.dimension_range(log_type, 'zhiqingqongqun', 'Z48hlq', 'ROWS', i + 1, i + 1)
-                    Common.logger(log_type).info('视频已下载,删除成功\n')
-                else:
-                    # 下载视频
-                    Common.download_method(log_type, 'video', download_video_title, download_video_url)
-                    # 获取视频时长
-                    video_info = cls.get_video_info_from_local("./videos/" + download_video_title + "/video.mp4")
-                    download_width = str(video_info[0])
-                    download_height = str(video_info[1])
-                    download_duration = video_info[2]
-                    # 视频时长<60s,直接删除
-                    if int(download_duration) < 60:
-                        # 删除视频文件夹
-                        shutil.rmtree("./videos/" + download_video_title + "/")
-                        # 删除云文档recommend_feeds中的记录
-                        Feishu.dimension_range(log_type, "zhiqingzongqun", "Z48hlq", "ROWS", i + 1, i + 1)
-                        Common.logger(log_type).info("时长:{}<60秒,删除成功\n", int(download_duration))
-                        return
-                    else:
-                        # 下载封面
-                        Common.download_method(log_type, 'cover', download_video_title, download_cover_url)
-                        # 保存视频信息至 "./videos/{download_video_title}/info.txt"
-                        with open("./videos/" + download_video_title
-                                  + "/" + "info.txt", "a", encoding="UTF-8") as f_a:
-                            f_a.write(str(int(time.time())) + "\n" +
-                                      str(download_video_title) + "\n" +
-                                      str(int(download_duration)) + "\n" +
-                                      str(int(float(download_play_cnt.split(' ')[-1].split('万')[0])*10000)) + "\n" +
-                                      str(download_comment_cnt) + "\n" +
-                                      str(download_like_cnt) + "\n" +
-                                      str(download_share_cnt) + "\n" +
-                                      str(download_width)+'*'+str(download_height) + "\n" +
-                                      str(int(time.time())) + "\n" +
-                                      str(download_user_name) + "\n" +
-                                      str(download_head_url) + "\n" +
-                                      str(download_video_url) + "\n" +
-                                      str(download_cover_url) + "\n" +
-                                      "zhiqingzongqun"+str(int(time.time())))
-                        Common.logger(log_type).info("==========视频信息已保存至info.txt==========")
-
-                        # 上传视频
-                        Common.logger(log_type).info("开始上传视频:{}".format(download_video_title))
-                        if env == 'dev' and int(download_width) >= int(download_height):
-                            our_video_id = Publish.upload_and_publish(log_type, env, "width")
-                            our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
-                                our_video_id) + "/info"
-                        elif env == 'dev' and int(download_width) < int(download_height):
-                            our_video_id = Publish.upload_and_publish(log_type, env, "height")
-                            our_video_link = "https://testadmin.piaoquantv.com/cms/post-detail/" + str(
-                                our_video_id) + "/info"
-                        elif env == 'prod' and int(download_width) >= int(download_height):
-                            our_video_id = Publish.upload_and_publish(log_type, env, "width")
-                            our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
-                                our_video_id) + "/info"
-                        elif env == 'prod' and int(download_width) < int(download_height):
-                            our_video_id = Publish.upload_and_publish(log_type, env, "height")
-                            our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
-                                our_video_id) + "/info"
-                        else:
-                            our_video_id = Publish.upload_and_publish(log_type, env, "width")
-                            our_video_link = "https://admin.piaoquantv.com/cms/post-detail/" + str(
-                                our_video_id) + "/info"
-                        Common.logger(log_type).info("视频上传完成:{}", download_video_title)
-
-                        # 保存视频 ID 到已下载表
-                        Common.logger(log_type).info("保存视频至已下载表:{}", download_video_title)
-                        # 视频ID工作表,插入首行
-                        Feishu.insert_columns(log_type, "zhiqingzongqun", "1a88b3", "ROWS", 1, 2)
-                        # 视频ID工作表,首行写入数据
-                        upload_time = int(time.time())
-                        values = [[time.strftime("%Y/%m/%d %H:%M:%S", time.localtime(upload_time)),
-                                   "推荐榜",
-                                   download_video_title,
-                                   our_video_link,
-                                   download_play_cnt,
-                                   int(download_duration),
-                                   str(download_width)+'*'+str(download_height),
-                                   download_cover_url,
-                                   download_video_url]]
-                        time.sleep(1)
-                        Feishu.update_values(log_type, "zhiqingzongqun", "1a88b3", "F2:V2", values)
-
-                        # 删除行或列,可选 ROWS、COLUMNS
-                        Feishu.dimension_range(log_type, "zhiqingzongqun", "Z48hlq", "ROWS", i + 1, i + 1)
-                        Common.logger(log_type).info("视频:{},下载/上传成功\n", download_video_title)
-                        return
-
-        except Exception as e:
-            Feishu.dimension_range(log_type, "zhiqingzongqun", "Z48hlq", "ROWS", 2, 2)
-            Common.logger(log_type).error('download_publish异常,删除成功:{}\n', e)
-
 
 if __name__ == '__main__':
     # Recommend.start_wechat('recommend', 'prod')