Browse Source

视频号抓取任务

luojunhui 4 months ago
parent
commit
65837ea254
1 changed files with 9 additions and 0 deletions
  1. 9 0
      tasks/crawler_channel_account_videos.py

+ 9 - 0
tasks/crawler_channel_account_videos.py

@@ -63,6 +63,14 @@ class CrawlerChannelAccountVideos:
         if self.whether_video_exists(title):
             return
 
+        if not title:
+            return
+
+        if len(title) < 10:
+            bad_status = 4
+        else:
+            bad_status = 0
+
         video_item = Item()
         video_id = video["id"]
         video_item.add("content_trace_id", "video{}".format(str_to_md5(str(video_id))))
@@ -72,6 +80,7 @@ class CrawlerChannelAccountVideos:
         video_item.add("out_account_name", video["nickname"])
         video_item.add("publish_timestamp", video["createtime"])
         video_item.add("platform", 'sph')
+        video_item.add("bad_status", bad_status)
         media = object_desc["media"][0]
         url = media["Url"]
         decode_key = media["decodeKey"]