|
@@ -160,8 +160,12 @@ class WeixinVideoCrawler(object):
|
|
if self.is_downloaded(url_unique):
|
|
if self.is_downloaded(url_unique):
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
+ title = article.get("Title", None)
|
|
|
|
+ if not title:
|
|
|
|
+ continue
|
|
|
|
+
|
|
# 判断标题是否重复
|
|
# 判断标题是否重复
|
|
- if video_crawler_duplicate_filter(article_url, self.db_client):
|
|
|
|
|
|
+ if video_crawler_duplicate_filter(title, self.db_client):
|
|
log(
|
|
log(
|
|
task='weixin_video_crawler',
|
|
task='weixin_video_crawler',
|
|
function="insert_msg_list",
|
|
function="insert_msg_list",
|
|
@@ -174,7 +178,6 @@ class WeixinVideoCrawler(object):
|
|
download_path = functions.download_gzh_video(article_url)
|
|
download_path = functions.download_gzh_video(article_url)
|
|
if download_path:
|
|
if download_path:
|
|
oss_path = functions.upload_to_oss(local_video_path=download_path)
|
|
oss_path = functions.upload_to_oss(local_video_path=download_path)
|
|
- title = article.get("Title", None)
|
|
|
|
position = article.get("ItemIndex", None)
|
|
position = article.get("ItemIndex", None)
|
|
cover_url = article.get("CoverImgUrl", None)
|
|
cover_url = article.get("CoverImgUrl", None)
|
|
show_desc = article.get("ShowDesc", None)
|
|
show_desc = article.get("ShowDesc", None)
|