|
@@ -26,6 +26,22 @@ class CrawlerChannelAccountVideos:
|
|
|
self.db_client.connect()
|
|
|
self.success_crawler_video_count = 0
|
|
|
|
|
|
+ def whether_video_exists(self, title: str) -> bool:
|
|
|
+ """
|
|
|
+ whether video exists, use video_id && title
|
|
|
+ """
|
|
|
+ # check title
|
|
|
+ sql = f"""
|
|
|
+ select id from publish_single_video_source
|
|
|
+ where article_title = %s;
|
|
|
+ """
|
|
|
+ duplicate_id = self.db_client.fetch(query=sql, params=(title,))
|
|
|
+ if duplicate_id:
|
|
|
+ print(title + " video exists")
|
|
|
+ return True
|
|
|
+
|
|
|
+ return False
|
|
|
+
|
|
|
def get_channel_account_list(self):
|
|
|
"""
|
|
|
get channel account list from database
|
|
@@ -48,6 +64,9 @@ class CrawlerChannelAccountVideos:
|
|
|
object_desc = video['objectDesc']
|
|
|
publish_timestamp = video['createtime']
|
|
|
title = object_desc['description']
|
|
|
+ if self.whether_video_exists(title):
|
|
|
+ continue
|
|
|
+
|
|
|
media = object_desc['media'][0]
|
|
|
url = media['Url']
|
|
|
decode_key = media['decodeKey']
|