|
|
@@ -66,7 +66,7 @@ class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
|
|
|
"wx_sn": wx_sn,
|
|
|
"task": "create_decode_task_v2",
|
|
|
"status": "fail",
|
|
|
- "message": "no response for channel_content_id, rolled back to INIT",
|
|
|
+ "message": "no response for content_id, rolled back to INIT",
|
|
|
}
|
|
|
)
|
|
|
continue
|
|
|
@@ -91,12 +91,14 @@ class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
|
|
|
# 已有解构结果,直接查询结果并落库
|
|
|
query_results = await self.tool.query_decode_results_batch([wx_sn])
|
|
|
result_data = query_results.get(wx_sn)
|
|
|
- if result_data and result_data.get("status") == self.QueryStatus.SUCCESS:
|
|
|
+ if (
|
|
|
+ result_data
|
|
|
+ and result_data.get("status") == self.QueryStatus.SUCCESS
|
|
|
+ ):
|
|
|
data_content = result_data.get("dataContent") or "{}"
|
|
|
html = result_data.get("html")
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=wx_sn,
|
|
|
- content_id=article_id,
|
|
|
+ source_id=wx_sn,
|
|
|
source=self.SourceType.AD_PLATFORM,
|
|
|
payload=json.dumps(
|
|
|
posts_by_wx.get(wx_sn, {}), ensure_ascii=False
|
|
|
@@ -104,7 +106,7 @@ class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
|
|
|
remark="提交时已有解构结果,直接落库",
|
|
|
)
|
|
|
await self.mapper.set_decode_result(
|
|
|
- channel_content_id=wx_sn,
|
|
|
+ source_id=wx_sn,
|
|
|
result=json.dumps(
|
|
|
{"dataContent": data_content, "html": html},
|
|
|
ensure_ascii=False,
|
|
|
@@ -126,13 +128,13 @@ class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
|
|
|
else:
|
|
|
# 提交返回 SUCCESS 但查询不到结果,插入记录等待轮询
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=wx_sn,
|
|
|
- content_id=article_id,
|
|
|
+ source_id=wx_sn,
|
|
|
source=self.SourceType.AD_PLATFORM,
|
|
|
payload=json.dumps(
|
|
|
posts_by_wx.get(wx_sn, {}), ensure_ascii=False
|
|
|
),
|
|
|
remark="提交返回SUCCESS,查询未果,等待轮询",
|
|
|
+ status=self.TaskStatus.PROCESSING,
|
|
|
)
|
|
|
await self.mapper.update_article_decode_status(
|
|
|
article_id,
|
|
|
@@ -150,13 +152,11 @@ class CreateAdPlatformArticlesDecodeTask(DecodeArticleConst):
|
|
|
)
|
|
|
elif status == self.SubmitStatus.PENDING:
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=wx_sn,
|
|
|
- content_id=article_id,
|
|
|
+ source_id=wx_sn,
|
|
|
source=self.SourceType.AD_PLATFORM,
|
|
|
- payload=json.dumps(
|
|
|
- posts_by_wx.get(wx_sn, {}), ensure_ascii=False
|
|
|
- ),
|
|
|
+ payload=json.dumps(posts_by_wx.get(wx_sn, {}), ensure_ascii=False),
|
|
|
remark="任务已提交,等待轮询",
|
|
|
+ status=self.TaskStatus.PROCESSING,
|
|
|
)
|
|
|
await self.mapper.update_article_decode_status(
|
|
|
article_id,
|
|
|
@@ -244,20 +244,18 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
async def _handle_result(
|
|
|
self,
|
|
|
article: Dict,
|
|
|
- channel_content_id: str,
|
|
|
+ source_id: str,
|
|
|
result: Dict,
|
|
|
posts_by_cid: Dict,
|
|
|
config_id: int,
|
|
|
):
|
|
|
- wx_sn = article["wx_sn"]
|
|
|
-
|
|
|
if not result:
|
|
|
await self.log_service.log(
|
|
|
contents={
|
|
|
- "wx_sn": wx_sn,
|
|
|
+ "source_id": source_id,
|
|
|
"task": "create_inner_decode_task",
|
|
|
"status": "fail",
|
|
|
- "message": "no response for channel_content_id",
|
|
|
+ "message": "no response for source_id",
|
|
|
}
|
|
|
)
|
|
|
return
|
|
|
@@ -266,7 +264,7 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
if status == self.SubmitStatus.FAILED:
|
|
|
await self.log_service.log(
|
|
|
contents={
|
|
|
- "wx_sn": wx_sn,
|
|
|
+ "source_id": source_id,
|
|
|
"task": "create_inner_decode_task",
|
|
|
"status": "fail",
|
|
|
"data": result,
|
|
|
@@ -274,48 +272,45 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
)
|
|
|
elif status == self.SubmitStatus.PENDING:
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=channel_content_id,
|
|
|
- content_id=str(article.get("source_id", "")),
|
|
|
+ source_id=source_id,
|
|
|
source=self.SourceType.INNER,
|
|
|
- payload=json.dumps(
|
|
|
- posts_by_cid.get(channel_content_id, {}), ensure_ascii=False
|
|
|
- ),
|
|
|
+ payload=json.dumps(posts_by_cid.get(source_id, {}), ensure_ascii=False),
|
|
|
remark="内部文章解构任务已提交",
|
|
|
+ status=self.TaskStatus.PROCESSING,
|
|
|
)
|
|
|
elif status == self.SubmitStatus.SUCCESS:
|
|
|
query_results = await self.tool.query_decode_results_batch(
|
|
|
- [channel_content_id], config_id=config_id
|
|
|
+ [source_id], config_id=config_id
|
|
|
)
|
|
|
- result_data = query_results.get(channel_content_id)
|
|
|
+ result_data = query_results.get(source_id)
|
|
|
data_content = result_data.get("dataContent") if result_data else None
|
|
|
if data_content:
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=channel_content_id,
|
|
|
- content_id=str(article.get("source_id", "")),
|
|
|
+ source_id=source_id,
|
|
|
source=self.SourceType.INNER,
|
|
|
payload=json.dumps(
|
|
|
- posts_by_cid.get(channel_content_id, {}), ensure_ascii=False
|
|
|
+ posts_by_cid.get(source_id, {}), ensure_ascii=False
|
|
|
),
|
|
|
remark="内部文章解构结果已获取",
|
|
|
)
|
|
|
await self.mapper.set_decode_result(
|
|
|
- channel_content_id=channel_content_id,
|
|
|
+ source_id=source_id,
|
|
|
result=json.dumps(
|
|
|
{"dataContent": data_content}, ensure_ascii=False
|
|
|
),
|
|
|
)
|
|
|
else:
|
|
|
await self.mapper.insert_decode_task(
|
|
|
- channel_content_id=channel_content_id,
|
|
|
- content_id=str(article.get("source_id", "")),
|
|
|
+ source_id=source_id,
|
|
|
source=self.SourceType.INNER,
|
|
|
payload=json.dumps(result, ensure_ascii=False),
|
|
|
remark="提交返回SUCCESS,查询未果,等待轮询",
|
|
|
+ status=self.TaskStatus.PROCESSING,
|
|
|
)
|
|
|
else:
|
|
|
await self.log_service.log(
|
|
|
contents={
|
|
|
- "wx_sn": wx_sn,
|
|
|
+ "source_id": source_id,
|
|
|
"task": "create_inner_decode_task",
|
|
|
"status": "fail",
|
|
|
"message": f"unexpected submit status: {status}",
|
|
|
@@ -329,9 +324,9 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
|
|
|
# 过滤已有任务记录的文章(测试模式跳过)
|
|
|
if not self._TEST_MODE:
|
|
|
- all_wx_sns = [a["wx_sn"] for a in articles]
|
|
|
- existing = await self.mapper.fetch_existing_channel_content_ids(all_wx_sns)
|
|
|
- new_articles = [a for a in articles if a["wx_sn"] not in existing]
|
|
|
+ all_source_ids = [str(a["source_id"]) for a in articles]
|
|
|
+ existing = await self.mapper.fetch_existing_source_ids(all_source_ids)
|
|
|
+ new_articles = [a for a in articles if str(a["source_id"]) not in existing]
|
|
|
skipped = len(articles) - len(new_articles)
|
|
|
if skipped > 0:
|
|
|
await self.log_service.log(
|
|
|
@@ -344,7 +339,9 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
for article in articles:
|
|
|
if article not in new_articles:
|
|
|
await self.mapper.update_inner_article_status(
|
|
|
- article["id"], self.TaskStatus.PROCESSING, self.TaskStatus.SUCCESS
|
|
|
+ article["id"],
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ self.TaskStatus.SUCCESS,
|
|
|
)
|
|
|
else:
|
|
|
new_articles = articles
|
|
|
@@ -359,7 +356,7 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
produce_info = await self.mapper.fetch_inner_articles_produce_detail(
|
|
|
source_id
|
|
|
)
|
|
|
- produce_info_map[article["wx_sn"]] = produce_info
|
|
|
+ produce_info_map[str(article["source_id"])] = produce_info
|
|
|
|
|
|
posts = self.tool.prepare_posts(new_articles, produce_info_map)
|
|
|
|
|
|
@@ -369,12 +366,12 @@ class CreateInnerArticlesDecodeTask(DecodeArticleConst):
|
|
|
posts_by_cid = {p["channelContentId"]: p for p in posts}
|
|
|
|
|
|
for article in tqdm(new_articles):
|
|
|
- wx_sn = article["wx_sn"]
|
|
|
+ source_id = str(article["source_id"])
|
|
|
article_id = article["id"]
|
|
|
|
|
|
- result = submit_results.get(wx_sn)
|
|
|
+ result = submit_results.get(source_id)
|
|
|
await self._handle_result(
|
|
|
- article, wx_sn, result, posts_by_cid, self.CONFIG_ID
|
|
|
+ article, source_id, result, posts_by_cid, self.CONFIG_ID
|
|
|
)
|
|
|
|
|
|
if not self._TEST_MODE:
|