Explorar o código

Merge branch '2024-11-12-luojunhui-add-crawler-plan-to-db' of luojunhui/LongArticlesJob into master

luojunhui hai 5 meses
pai
achega
d86b387689
Modificáronse 1 ficheiros con 42 adicións e 0 borrados
  1. 42 0
      coldStartTasks/publish/publishCategoryArticles.py

+ 42 - 0
coldStartTasks/publish/publishCategoryArticles.py

@@ -4,6 +4,8 @@
 """
 import datetime
 import json
+import time
+import traceback
 
 from pandas import DataFrame
 
@@ -42,6 +44,36 @@ class CategoryColdStartTask(object):
             }
         )
 
+    def insert_into_db(self, crawler_plan_id, crawler_plan_name, create_timestamp):
+        """
+        插入抓取计划到数据库中
+        :param create_timestamp:
+        :param crawler_plan_id:
+        :param crawler_plan_name:
+        :return:
+        """
+        insert_sql = f"""
+            INSERT INTO article_crawler_plan
+            (crawler_plan_id, name, create_timestamp)
+            values 
+            (%s, %s, %s)
+        """
+        try:
+            self.db_client.update(
+                sql=insert_sql,
+                params=(crawler_plan_id, crawler_plan_name, create_timestamp)
+            )
+        except Exception as e:
+            bot(
+                title="品类冷启任务,记录抓取计划id失败",
+                detail={
+                    "error": str(e),
+                    "error_msg": traceback.format_exc(),
+                    "crawler_plan_id": crawler_plan_id,
+                    "crawler_plan_name": crawler_plan_name
+                }
+            )
+
     def get_articles_from_meta_table(self, category):
         """
         从长文 meta 库中获取冷启文章
@@ -194,6 +226,7 @@ class CategoryColdStartTask(object):
             mention=False
         )
         if url_list:
+            # create_crawler_plan
             crawler_plan_response = aiditApi.auto_create_crawler_task(
                 plan_id=None,
                 plan_name="自动绑定-{}--{}--{}".format(category, datetime.date.today().__str__(), len(url_list)),
@@ -206,6 +239,13 @@ class CategoryColdStartTask(object):
                 message="成功创建抓取计划",
                 data=crawler_plan_response
             )
+
+            # save to db
+            create_timestamp = int(time.time()) * 1000
+            crawler_plan_id = crawler_plan_response['data']['id']
+            crawler_plan_name = crawler_plan_response['data']['name']
+            self.insert_into_db(crawler_plan_id, crawler_plan_name, create_timestamp)
+
             # auto bind to generate plan
             new_crawler_task_list = [
                 {
@@ -229,6 +269,8 @@ class CategoryColdStartTask(object):
                 message="成功绑定到生成计划",
                 data=generate_plan_response
             )
+
+            # change article status
             article_id_list = filter_df['article_id'].values.tolist()
             self.change_article_status_while_publishing(article_id_list=article_id_list)