瀏覽代碼

冷启动,将抓取计划id存储到抓取计划表

luojunhui 7 月之前
父節點
當前提交
b33ed17804
共有 1 個文件被更改,包括 37 次插入0 次删除
  1. 37 0
      coldStartTasks/publish/publishCategoryArticles.py

+ 37 - 0
coldStartTasks/publish/publishCategoryArticles.py

@@ -4,6 +4,8 @@
 """
 """
 import datetime
 import datetime
 import json
 import json
+import time
+import traceback
 
 
 from pandas import DataFrame
 from pandas import DataFrame
 
 
@@ -42,6 +44,36 @@ class CategoryColdStartTask(object):
             }
             }
         )
         )
 
 
+    def insert_into_db(self, crawler_plan_id, crawler_plan_name, create_timestamp):
+        """
+        插入抓取计划到数据库中
+        :param create_timestamp:
+        :param crawler_plan_id:
+        :param crawler_plan_name:
+        :return:
+        """
+        insert_sql = f"""
+            INSERT INTO article_crawler_plan
+            (crawler_plan_id, name, create_timestamp)
+            values 
+            (%s, %s, %s)
+        """
+        try:
+            self.db_client.insert(
+                sql=insert_sql,
+                params=(crawler_plan_id, crawler_plan_name, create_timestamp)
+            )
+        except Exception as e:
+            bot(
+                title="品类冷启任务,记录抓取计划id失败",
+                detail={
+                    "error": str(e),
+                    "error_msg": traceback.format_exc(),
+                    "crawler_plan_id": crawler_plan_id,
+                    "crawler_plan_name": crawler_plan_name
+                }
+            )
+
     def get_articles_from_meta_table(self, category):
     def get_articles_from_meta_table(self, category):
         """
         """
         从长文 meta 库中获取冷启文章
         从长文 meta 库中获取冷启文章
@@ -207,6 +239,11 @@ class CategoryColdStartTask(object):
                 data=crawler_plan_response
                 data=crawler_plan_response
             )
             )
             # auto bind to generate plan
             # auto bind to generate plan
+            create_timestamp = int(time.time()) * 1000
+            crawler_plan_id = crawler_plan_response['data']['id']
+            crawler_plan_name = crawler_plan_response['data']['name']
+            self.insert_into_db(crawler_plan_id, crawler_plan_name, create_timestamp)
+
             new_crawler_task_list = [
             new_crawler_task_list = [
                 {
                 {
                     "contentType": 1,
                     "contentType": 1,