Bläddra i källkod

头条blogger接口完成

luojunhui 4 månader sedan
förälder
incheckning
bb24467c4c
1 ändrade filer med 14 tillägg och 5 borttagningar
  1. 14 5
      tasks/crawler_toutiao_account_videos.py

+ 14 - 5
tasks/crawler_toutiao_account_videos.py

@@ -127,7 +127,7 @@ class CrawlerToutiaoAccountVideos:
         video_item.add("out_account_name", video_data["source"])
         video_item.add("publish_timestamp", video_data["publish_time"])
         video_item.add("platform", const.PLATFORM)
-        video_item.add("read_cnt", video_data["read_count"])
+        video_item.add("read_cnt", video_data.get("read_count", 0))
         video_item.add("article_url", url)
         video_item.add("source_account", const.NO_SOURCE_ACCOUNT_STATUS)
         video_item.add("crawler_timestamp", int(time.time()))
@@ -143,7 +143,16 @@ class CrawlerToutiaoAccountVideos:
                     self.db_client, item_with_oss_path
                 )
         except Exception as e:
-            print(e)
+            log(
+                task="crawler_toutiao_account_videos",
+                function="crawler_toutiao_account_videos",
+                message="etl failed",
+                data={
+                    "video_item": video_item.item,
+                    "error": str(e),
+                    "traceback": traceback.format_exc(),
+                }
+            )
 
     def update_account_max_cursor(self, account_id: str) -> None:
         """
@@ -159,9 +168,9 @@ class CrawlerToutiaoAccountVideos:
 
         if max_publish_timestamp:
             update_sql = f"""
-                        update video_meta_accounts
-                        set max_cursor = %s
-                        where account_id = %s and platform = %s;
+                update video_meta_accounts
+                set max_cursor = %s
+                where account_id = %s and platform = %s;
             """
             self.db_client.save(
                 query=update_sql,