Przeglądaj źródła

视频号抓取任务

luojunhui 7 miesięcy temu
rodzic
commit
c36895b401
1 zmienionych plików z 5 dodań i 2 usunięć
  1. 5 2
      coldStartTasks/crawler/weixin_video_crawler.py

+ 5 - 2
coldStartTasks/crawler/weixin_video_crawler.py

@@ -160,8 +160,12 @@ class WeixinVideoCrawler(object):
                     if self.is_downloaded(url_unique):
                         continue
 
+                    title = article.get("Title", None)
+                    if not title:
+                        continue
+
                     # 判断标题是否重复
-                    if video_crawler_duplicate_filter(article_url, self.db_client):
+                    if video_crawler_duplicate_filter(title, self.db_client):
                         log(
                             task='weixin_video_crawler',
                             function="insert_msg_list",
@@ -174,7 +178,6 @@ class WeixinVideoCrawler(object):
                         download_path = functions.download_gzh_video(article_url)
                         if download_path:
                             oss_path = functions.upload_to_oss(local_video_path=download_path)
-                            title = article.get("Title", None)
                             position = article.get("ItemIndex", None)
                             cover_url = article.get("CoverImgUrl", None)
                             show_desc = article.get("ShowDesc", None)