luojunhui преди 4 месеца
родител
ревизия
e00be6894a
променени са 1 файла, в които са добавени 19 реда и са изтрити 0 реда
  1. 19 0
      tasks/crawler_channel_account_videos.py

+ 19 - 0
tasks/crawler_channel_account_videos.py

@@ -26,6 +26,22 @@ class CrawlerChannelAccountVideos:
         self.db_client.connect()
         self.success_crawler_video_count = 0
 
+    def whether_video_exists(self, title: str) -> bool:
+        """
+        whether video exists, use video_id && title
+        """
+        # check title
+        sql = f"""
+            select id from publish_single_video_source
+            where article_title = %s;
+        """
+        duplicate_id = self.db_client.fetch(query=sql, params=(title,))
+        if duplicate_id:
+            print(title + " video exists")
+            return True
+
+        return False
+
     def get_channel_account_list(self):
         """
         get channel account list from database
@@ -48,6 +64,9 @@ class CrawlerChannelAccountVideos:
                 object_desc = video['objectDesc']
                 publish_timestamp = video['createtime']
                 title = object_desc['description']
+                if self.whether_video_exists(title):
+                    continue
+
                 media = object_desc['media'][0]
                 url = media['Url']
                 decode_key = media['decodeKey']