|
@@ -38,6 +38,8 @@ class Const:
|
|
|
|
|
|
BATCH_SIZE = 20
|
|
|
|
|
|
+ PROCESS_NUM = 1000
|
|
|
+
|
|
|
|
|
|
class TitleProcess(Const):
|
|
|
def __init__(self, pool, aliyun_log, trace_id):
|
|
@@ -469,7 +471,7 @@ class ArticlePoolCategoryGeneration(TitleProcess):
|
|
|
),
|
|
|
)
|
|
|
|
|
|
- async def get_task_list(self, limit=1000):
|
|
|
+ async def get_task_list(self, limit):
|
|
|
query = f"""
|
|
|
select article_id, title from long_articles.crawler_meta_article
|
|
|
where category_status = %s and status = %s and score > %s
|
|
@@ -564,14 +566,34 @@ class ArticlePoolCategoryGeneration(TitleProcess):
|
|
|
else:
|
|
|
return
|
|
|
|
|
|
- async def deal(self):
|
|
|
-
|
|
|
+ async def deal(self, limit):
|
|
|
await self._roll_back_lock_tasks(table_name="crawler_meta_article")
|
|
|
|
|
|
- task_list = await self.get_task_list()
|
|
|
+ if not limit:
|
|
|
+ limit = self.PROCESS_NUM
|
|
|
+
|
|
|
+ task_list = await self.get_task_list(limit=limit)
|
|
|
task_batch_list = yield_batch(data=task_list, batch_size=self.BATCH_SIZE)
|
|
|
+ batch_index = 0
|
|
|
for task_batch in task_batch_list:
|
|
|
- await self.process_each_batch(task_batch)
|
|
|
+ batch_index += 1
|
|
|
+ try:
|
|
|
+ await self.process_each_batch(task_batch)
|
|
|
+
|
|
|
+ except Exception as e:
|
|
|
+ await self.aliyun_log.log(
|
|
|
+ contents={
|
|
|
+ "task": "ArticlePoolCategoryGeneration",
|
|
|
+ "function": "deal",
|
|
|
+ "message": f"batch {batch_index} processed failed",
|
|
|
+ "status": "fail",
|
|
|
+ "trace_id": self.trace_id,
|
|
|
+ "data": {
|
|
|
+ "error": str(e),
|
|
|
+ "traceback": traceback.format_exc(),
|
|
|
+ }
|
|
|
+ }
|
|
|
+ )
|
|
|
|
|
|
|
|
|
|