|
@@ -127,7 +127,7 @@ class CrawlerToutiaoAccountVideos:
|
|
|
video_item.add("out_account_name", video_data["source"])
|
|
|
video_item.add("publish_timestamp", video_data["publish_time"])
|
|
|
video_item.add("platform", const.PLATFORM)
|
|
|
- video_item.add("read_cnt", video_data["read_count"])
|
|
|
+ video_item.add("read_cnt", video_data.get("read_count", 0))
|
|
|
video_item.add("article_url", url)
|
|
|
video_item.add("source_account", const.NO_SOURCE_ACCOUNT_STATUS)
|
|
|
video_item.add("crawler_timestamp", int(time.time()))
|
|
@@ -143,7 +143,16 @@ class CrawlerToutiaoAccountVideos:
|
|
|
self.db_client, item_with_oss_path
|
|
|
)
|
|
|
except Exception as e:
|
|
|
- print(e)
|
|
|
+ log(
|
|
|
+ task="crawler_toutiao_account_videos",
|
|
|
+ function="crawler_toutiao_account_videos",
|
|
|
+ message="etl failed",
|
|
|
+ data={
|
|
|
+ "video_item": video_item.item,
|
|
|
+ "error": str(e),
|
|
|
+ "traceback": traceback.format_exc(),
|
|
|
+ }
|
|
|
+ )
|
|
|
|
|
|
def update_account_max_cursor(self, account_id: str) -> None:
|
|
|
"""
|
|
@@ -159,9 +168,9 @@ class CrawlerToutiaoAccountVideos:
|
|
|
|
|
|
if max_publish_timestamp:
|
|
|
update_sql = f"""
|
|
|
- update video_meta_accounts
|
|
|
- set max_cursor = %s
|
|
|
- where account_id = %s and platform = %s;
|
|
|
+ update video_meta_accounts
|
|
|
+ set max_cursor = %s
|
|
|
+ where account_id = %s and platform = %s;
|
|
|
"""
|
|
|
self.db_client.save(
|
|
|
query=update_sql,
|