Переглянути джерело

develop baidu video downloader

luojunhui 8 місяців тому
батько
коміт
71161bf78e
1 змінених файлів з 23 додано та 2 видалено
  1. 23 2
      coldStartTasks/crawler/baidu/video_crawler.py

+ 23 - 2
coldStartTasks/crawler/baidu/video_crawler.py

@@ -41,7 +41,7 @@ class BaiduVideoCrawler(object):
         status = 1 表示正常抓取的账号
         """
         sql = f"""
-            select account_id, account_name, latest_crawler_timestamp as max_cursor 
+            select account_id, account_name, max_cursor 
             from baidu_account_for_videos
             where status = 1;
         """
@@ -191,10 +191,31 @@ class BaiduVideoCrawler(object):
             print(e)
             return
 
+    def update_cursor(self, account_id):
+        """
+        update cursor for each account
+        """
+        select_sql = f"""
+            select max(publish_timestamp) as from publish_single_video_source where out_account_id = '{account_id}';
+        """
+        response_mysql = self.db.fetch(query=select_sql)
+        max_publish_timestamp = response_mysql[0][0]
+        max_cursor = max_publish_timestamp * 10000
+        update_sql = f"""
+            update baidu_account_for_videos
+            set max_cursor = %s
+            where account_id = %s;
+        """
+        self.db.save(
+            query=update_sql,
+            params=(max_cursor, account_id)
+        )
+
     def deal(self):
         """
         deal
         """
         account_list = self.get_account_list()
-        for account in account_list[8:]:
+        for account in account_list:
             self.crawler_each_account(account)
+            self.update_cursor(account['account_id'])