|
@@ -4,6 +4,8 @@
|
|
|
"""
|
|
|
import datetime
|
|
|
import json
|
|
|
+import time
|
|
|
+import traceback
|
|
|
|
|
|
from pandas import DataFrame
|
|
|
|
|
@@ -42,6 +44,36 @@ class CategoryColdStartTask(object):
|
|
|
}
|
|
|
)
|
|
|
|
|
|
+ def insert_into_db(self, crawler_plan_id, crawler_plan_name, create_timestamp):
|
|
|
+ """
|
|
|
+ 插入抓取计划到数据库中
|
|
|
+ :param create_timestamp:
|
|
|
+ :param crawler_plan_id:
|
|
|
+ :param crawler_plan_name:
|
|
|
+ :return:
|
|
|
+ """
|
|
|
+ insert_sql = f"""
|
|
|
+ INSERT INTO article_crawler_plan
|
|
|
+ (crawler_plan_id, name, create_timestamp)
|
|
|
+ values
|
|
|
+ (%s, %s, %s)
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ self.db_client.update(
|
|
|
+ sql=insert_sql,
|
|
|
+ params=(crawler_plan_id, crawler_plan_name, create_timestamp)
|
|
|
+ )
|
|
|
+ except Exception as e:
|
|
|
+ bot(
|
|
|
+ title="品类冷启任务,记录抓取计划id失败",
|
|
|
+ detail={
|
|
|
+ "error": str(e),
|
|
|
+ "error_msg": traceback.format_exc(),
|
|
|
+ "crawler_plan_id": crawler_plan_id,
|
|
|
+ "crawler_plan_name": crawler_plan_name
|
|
|
+ }
|
|
|
+ )
|
|
|
+
|
|
|
def get_articles_from_meta_table(self, category):
|
|
|
"""
|
|
|
从长文 meta 库中获取冷启文章
|
|
@@ -194,6 +226,7 @@ class CategoryColdStartTask(object):
|
|
|
mention=False
|
|
|
)
|
|
|
if url_list:
|
|
|
+ # create_crawler_plan
|
|
|
crawler_plan_response = aiditApi.auto_create_crawler_task(
|
|
|
plan_id=None,
|
|
|
plan_name="自动绑定-{}--{}--{}".format(category, datetime.date.today().__str__(), len(url_list)),
|
|
@@ -206,6 +239,13 @@ class CategoryColdStartTask(object):
|
|
|
message="成功创建抓取计划",
|
|
|
data=crawler_plan_response
|
|
|
)
|
|
|
+
|
|
|
+ # save to db
|
|
|
+ create_timestamp = int(time.time()) * 1000
|
|
|
+ crawler_plan_id = crawler_plan_response['data']['id']
|
|
|
+ crawler_plan_name = crawler_plan_response['data']['name']
|
|
|
+ self.insert_into_db(crawler_plan_id, crawler_plan_name, create_timestamp)
|
|
|
+
|
|
|
# auto bind to generate plan
|
|
|
new_crawler_task_list = [
|
|
|
{
|
|
@@ -229,6 +269,8 @@ class CategoryColdStartTask(object):
|
|
|
message="成功绑定到生成计划",
|
|
|
data=generate_plan_response
|
|
|
)
|
|
|
+
|
|
|
+ # change article status
|
|
|
article_id_list = filter_df['article_id'].values.tolist()
|
|
|
self.change_article_status_while_publishing(article_id_list=article_id_list)
|
|
|
|