zhangyong 1 rok temu
rodzic
commit
0becb9aa4b

+ 4 - 0
application/pipeline/pipeline.py

@@ -135,6 +135,10 @@ class PiaoQuanPipeline(object):
         if self.mode == "recommend" and self.platform == "yuannifuqichangzai":
             title = self.item["video_title"]
             sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}" and video_title="{title}"; """
+        # 判断加上标题去重
+        elif self.mode == "recommend" and self.platform == "zhufuhaoyunbaofu":
+            title = self.item["video_title"]
+            sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}" and video_title="{title}"; """
         else:
             sql = f""" select 1 from crawler_video where platform = "{self.platform}" and out_video_id="{out_id}"; """
         repeat_video = self.mysql.select(sql=sql)

+ 2 - 15
spider/crawler_offline/zhufuhaoyunbaofu.py

@@ -20,7 +20,7 @@ from selenium.webdriver.common.by import By
 sys.path.append(os.getcwd())
 
 from application.functions import get_redirect_url
-from application.pipeline import PiaoQuanPipelineTest
+from application.pipeline import PiaoQuanPipelineTest, PiaoQuanPipeline
 from application.common.log import AliyunLogger, Local
 from application.common import MysqlHelper, Feishu
 
@@ -133,16 +133,6 @@ class ZFHYBFRecommend(object):
         time.sleep(1)
         self.driver.quit()
 
-
-    def repeat_video(self, out_video_id, video_title):
-        sql = f""" select * from crawler_video where platform = "{self.platform}" and out_video_id="{out_video_id}" and video_title="{video_title}"; """
-        repeat_video = MysqlHelper.select(sql=sql)
-        if repeat_video:
-            message = "重复的视频"
-            print(message)
-            return False
-        return True
-
     def search_elements(self, xpath):
         time.sleep(1)
         windowHandles = self.driver.window_handles
@@ -237,9 +227,6 @@ class ZFHYBFRecommend(object):
             return
         out_video_id = md5(video_title.encode("utf8")).hexdigest()
         out_user_id = md5(user_name.encode("utf8")).hexdigest()
-        repeat_video = self.repeat_video(out_video_id, video_title)
-        if repeat_video == False:
-            return
         video_dict = {
             "video_title": video_title,
             "video_id": out_video_id,
@@ -263,7 +250,7 @@ class ZFHYBFRecommend(object):
             "cover_url": cover_url,
             "session": f"zhufuhaoyunbaofu-{int(time.time())}",
         }
-        pipeline = PiaoQuanPipelineTest(
+        pipeline = PiaoQuanPipeline(
             platform=self.crawler,
             mode=self.log_type,
             item=video_dict,