wangkun 1 jaar geleden
bovenliggende
commit
2ab07933b1
1 gewijzigde bestanden met toevoegingen van 23 en 24 verwijderingen
  1. 23 24
      xigua/xigua_search/xigua_search_scheduling.py

+ 23 - 24
xigua/xigua_search/xigua_search_scheduling.py

@@ -567,24 +567,24 @@ class XiguasearchScheduling:
         Common.logger(log_type, crawler).info(f"打开搜索页:{user_dict['link']}")
         Common.logger(log_type, crawler).info(f"打开搜索页:{user_dict['link']}")
         driver.get(f"https://www.ixigua.com/search/{user_dict['link']}/")
         driver.get(f"https://www.ixigua.com/search/{user_dict['link']}/")
         time.sleep(3)
         time.sleep(3)
-        driver.get_screenshot_as_file(f"./{crawler}/logs/打开搜索页.jpg")
-        if len(driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]')) != 0:
-            driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click()
-            time.sleep(1)
-        Common.logger(log_type, crawler).info("点击筛选")
-        driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
-        time.sleep(1)
-        Common.logger(log_type, crawler).info("点击最新排序")
-        driver.find_element(By.XPATH, '//*[@class="searchPageV2-category__wrapper"]/*[2]/*[1]').click()
-        time.sleep(5)
-        Common.logger(log_type, crawler).info("收回筛选")
-        driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
-        time.sleep(1)
-        # 点击列表形式//div[@class="searchPageV2__header-icons"]/*[3]
-        Common.logger(log_type, crawler).info("点击列表形式展示")
-        driver.find_element(By.XPATH, '//div[@class="searchPageV2__header-icons"]/*[3]').click()
-        time.sleep(3)
-        driver.get_screenshot_as_file(f"./{crawler}/logs/已点击最新排序.jpg")
+        # driver.get_screenshot_as_file(f"./{crawler}/logs/打开搜索页.jpg")
+        # if len(driver.find_elements(By.XPATH, '//*[@class="xg-notification-close"]')) != 0:
+        #     driver.find_element(By.XPATH, '//*[@class="xg-notification-close"]').click()
+        #     time.sleep(1)
+        # Common.logger(log_type, crawler).info("点击筛选")
+        # driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
+        # time.sleep(1)
+        # Common.logger(log_type, crawler).info("点击最新排序")
+        # driver.find_element(By.XPATH, '//*[@class="searchPageV2-category__wrapper"]/*[2]/*[1]').click()
+        # time.sleep(5)
+        # Common.logger(log_type, crawler).info("收回筛选")
+        # driver.find_element(By.XPATH, '//*[@class="searchPageV2__header-icons-categories"]').click()
+        # time.sleep(1)
+        # # 点击列表形式//div[@class="searchPageV2__header-icons"]/*[3]
+        # Common.logger(log_type, crawler).info("点击列表形式展示")
+        # driver.find_element(By.XPATH, '//div[@class="searchPageV2__header-icons"]/*[3]').click()
+        # time.sleep(3)
+        # driver.get_screenshot_as_file(f"./{crawler}/logs/已点击最新排序.jpg")
 
 
         index = 0
         index = 0
         num = 0
         num = 0
@@ -614,8 +614,7 @@ class XiguasearchScheduling:
                 driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
                 driver.execute_script("arguments[0].scrollIntoView({block:'center',inline:'center'})", video_element)
                 time.sleep(3)
                 time.sleep(3)
                 driver.get_screenshot_as_file(f"./{crawler}/logs/{num}.jpg")
                 driver.get_screenshot_as_file(f"./{crawler}/logs/{num}.jpg")
-                # item_id = video_element.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]')[index+i].get_attribute('href')
-                item_id = driver.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]')[index+i].get_attribute('href')
+                item_id = video_element.find_elements(By.XPATH, '//*[@class="HorizontalFeedCard__coverWrapper disableZoomAnimation"]')[index+i].get_attribute('href')
                 item_id = item_id.split("com/")[-1].split("?&")[0]
                 item_id = item_id.split("com/")[-1].split("?&")[0]
                 video_dict = cls.get_video_info(log_type, crawler, item_id)
                 video_dict = cls.get_video_info(log_type, crawler, item_id)
                 if video_dict is None:
                 if video_dict is None:
@@ -623,10 +622,10 @@ class XiguasearchScheduling:
                     continue
                     continue
                 for k, v in video_dict.items():
                 for k, v in video_dict.items():
                     Common.logger(log_type, crawler).info(f"{k}:{v}")
                     Common.logger(log_type, crawler).info(f"{k}:{v}")
-                if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
-                    Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
-                    driver.quit()
-                    return
+                # if int((int(time.time()) - int(video_dict["publish_time_stamp"])) / (3600 * 24)) > int(rule_dict.get("period", {}).get("max", 1000)):
+                #     Common.logger(log_type, crawler).info(f'发布时间超过{int(rule_dict.get("period", {}).get("max", 1000))}天\n')
+                #     driver.quit()
+                #     return
                 if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                 if download_rule(log_type=log_type, crawler=crawler, video_dict=video_dict, rule_dict=rule_dict) is False:
                     Common.logger(log_type, crawler).info("不满足抓取规则\n")
                     Common.logger(log_type, crawler).info("不满足抓取规则\n")
                 elif any(str(word) if str(word) in video_dict["video_title"] else False
                 elif any(str(word) if str(word) in video_dict["video_title"] else False