|
|
@@ -6,25 +6,39 @@ from ._const import DecodeTaskConst
|
|
|
|
|
|
|
|
|
class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
+ DECODE_TASK_QUEUE = "long_articles_new_decode_tasks"
|
|
|
+ INNER_DECODE_CREATE_STATE = "long_articles_inner_decode_create_state"
|
|
|
+
|
|
|
def __init__(self, pool: DatabaseManager):
|
|
|
self.pool = pool
|
|
|
|
|
|
# 存储解构任务
|
|
|
async def record_decode_task(
|
|
|
- self, task_id: str, wx_sn: str, remark: str = None
|
|
|
+ self, task_id: str, content_id: str, task_type: int, payload: str, remark: str = None
|
|
|
) -> int:
|
|
|
- query = """
|
|
|
- INSERT INTO long_articles_decode_tasks (task_id, wx_sn, remark)
|
|
|
- VALUES (%s, %s, %s)
|
|
|
+ query = f"""
|
|
|
+ INSERT INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
|
|
|
+ VALUES (%s, %s, %s, %s, %s)
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(query=query, params=(task_id, content_id, task_type, payload, remark))
|
|
|
+
|
|
|
+ async def record_decode_task_if_absent(
|
|
|
+ self, task_id: str, content_id: str, task_type: int, payload: str, remark: str = None
|
|
|
+ ) -> int:
|
|
|
+ query = f"""
|
|
|
+ INSERT IGNORE INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
|
|
|
+ VALUES (%s, %s, %s, %s, %s)
|
|
|
"""
|
|
|
- return await self.pool.async_save(query=query, params=(task_id, wx_sn, remark))
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query, params=(task_id, content_id, task_type, payload, remark)
|
|
|
+ )
|
|
|
|
|
|
# 更新解构任务状态
|
|
|
async def update_decode_task_status(
|
|
|
self, task_id: str, ori_status: int, new_status: int, remark: str = None
|
|
|
) -> int:
|
|
|
- query = """
|
|
|
- UPDATE long_articles_decode_tasks
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.DECODE_TASK_QUEUE}
|
|
|
SET status = %s, remark = %s
|
|
|
WHERE task_id = %s AND status = %s;
|
|
|
"""
|
|
|
@@ -36,8 +50,8 @@ class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
async def set_decode_result(
|
|
|
self, task_id: str, result: str, remark: str = None
|
|
|
) -> int:
|
|
|
- query = """
|
|
|
- UPDATE long_articles_decode_tasks
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.DECODE_TASK_QUEUE}
|
|
|
SET status = %s, remark = %s, result = %s
|
|
|
WHERE task_id = %s AND status = %s;
|
|
|
"""
|
|
|
@@ -54,8 +68,8 @@ class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
|
|
|
# 获取待拉取结果的解构任务(status=INIT,尚未拿到解构结果)
|
|
|
async def fetch_decoding_tasks(self) -> List[Dict]:
|
|
|
- query = """
|
|
|
- SELECT task_id FROM long_articles_decode_tasks WHERE status = %s LIMIT %s;
|
|
|
+ query = f"""
|
|
|
+ SELECT task_id FROM {self.DECODE_TASK_QUEUE} WHERE status = %s LIMIT %s;
|
|
|
"""
|
|
|
return await self.pool.async_fetch(
|
|
|
query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
|
|
|
@@ -63,8 +77,8 @@ class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
|
|
|
# 获取待解析的任务(获取处理成功的任务)
|
|
|
async def fetch_extract_tasks(self):
|
|
|
- query = """
|
|
|
- SELECT id, result FROM long_articles_decode_tasks
|
|
|
+ query = f"""
|
|
|
+ SELECT id, result FROM {self.DECODE_TASK_QUEUE}
|
|
|
WHERE extract_status = %s AND status = %s;
|
|
|
"""
|
|
|
return await self.pool.async_fetch(
|
|
|
@@ -73,8 +87,8 @@ class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
|
|
|
# 修改解析状态(用于加锁与状态流转)
|
|
|
async def update_extract_status(self, task_id, ori_status, new_status):
|
|
|
- query = """
|
|
|
- UPDATE long_articles_decode_tasks
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.DECODE_TASK_QUEUE}
|
|
|
SET extract_status = %s WHERE extract_status = %s AND id = %s;
|
|
|
"""
|
|
|
return await self.pool.async_save(
|
|
|
@@ -104,11 +118,32 @@ class ArticlesDecodeTaskMapper(DecodeTaskConst):
|
|
|
),
|
|
|
)
|
|
|
|
|
|
+ # 判断是否存在相同的任务 id
|
|
|
+ async def fetch_exist_source_id(self, source_id, task_type):
|
|
|
+ query = f"""
|
|
|
+ SELECT id FROM {self.DECODE_TASK_QUEUE}
|
|
|
+ WHERE content_id = %s AND task_type = %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(
|
|
|
+ query=query, params=(source_id, task_type)
|
|
|
+ )
|
|
|
+
|
|
|
|
|
|
class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
|
|
|
def __init__(self, pool: DatabaseManager):
|
|
|
super().__init__(pool)
|
|
|
|
|
|
+ async def record_decode_task(
|
|
|
+ self, task_id: str, wx_sn: str, remark: str = None
|
|
|
+ ) -> int:
|
|
|
+ return await super().record_decode_task(
|
|
|
+ task_id=task_id,
|
|
|
+ content_id=wx_sn,
|
|
|
+ task_type=self.TaskType.SOURCE_IMAGES_TEXT,
|
|
|
+ payload="{}",
|
|
|
+ remark=remark,
|
|
|
+ )
|
|
|
+
|
|
|
# 修改文章解构状态
|
|
|
async def update_article_decode_status(
|
|
|
self, id_: int, ori_status: int, new_status: int
|
|
|
@@ -141,24 +176,15 @@ class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
|
|
|
super().__init__(pool)
|
|
|
|
|
|
# 获取内部文章
|
|
|
- async def fetch_inner_articles(self):
|
|
|
+ async def fetch_inner_articles(self, date_string="20260401"):
|
|
|
query = """
|
|
|
- SELECT title
|
|
|
- ,SUM(fans) AS total_fans
|
|
|
- ,SUM(view_count) AS total_view
|
|
|
- ,SUM(view_count) / SUM(fans) AS avg_read_rate
|
|
|
- ,SUM(first_level) AS total_first_level
|
|
|
- ,MAX(source_id) as source_id
|
|
|
- ,MAX(wx_sn) as wx_sn
|
|
|
- FROM datastat_sort_strategy
|
|
|
- WHERE date_str >= '20250101'
|
|
|
- GROUP BY title
|
|
|
- HAVING total_fans > 100000
|
|
|
- AND avg_read_rate > 0.008
|
|
|
- AND total_first_level > 0;
|
|
|
-
|
|
|
- """
|
|
|
- return await self.pool.async_fetch(query=query)
|
|
|
+ SELECT title, source_id, wx_sn, cover_img_url FROM long_articles_good_read_article WHERE dt = %s
|
|
|
+ AND source_id IN ('20260222161421405412536', '20241011051312526697231', '20241125113847098601958', '20241011042712648349812')
|
|
|
+ ORDER by max_read_rate DESC LIMIT %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(
|
|
|
+ query=query, params=(date_string, 20)
|
|
|
+ )
|
|
|
|
|
|
# 获取内部文章生成信息
|
|
|
async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
|
|
|
@@ -171,6 +197,173 @@ class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
|
|
|
query=query, db_name="aigc", params=(source_id,)
|
|
|
)
|
|
|
|
|
|
+ # 获取文章源信息
|
|
|
+ async def fetch_article_crawler_source_info(self, source_id: str):
|
|
|
+ query = """
|
|
|
+ SELECT
|
|
|
+ t2.channel_content_id, t3.body_text
|
|
|
+ FROM produce_plan_exe_record t1
|
|
|
+ LEFT JOIN produce_plan_exe_refer_content t2 ON t2.plan_exe_id = t1.plan_exe_id
|
|
|
+ LEFT JOIN crawler_content_blob t3 ON t3.channel_content_id = t2.channel_content_id
|
|
|
+ WHERE
|
|
|
+ t1.plan_exe_id = %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_fetch(
|
|
|
+ query=query, db_name="aigc", params=(source_id,)
|
|
|
+ )
|
|
|
+
|
|
|
+ async def init_create_state(self, source_id: str, task_type: int, now_ts: int):
|
|
|
+ query = f"""
|
|
|
+ INSERT IGNORE INTO {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ (source_id, task_type, status, retry_count, locked_at, created_at, updated_at)
|
|
|
+ VALUES (%s, %s, %s, %s, %s, %s, %s);
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ source_id,
|
|
|
+ task_type,
|
|
|
+ self.TaskStatus.INIT,
|
|
|
+ 0,
|
|
|
+ 0,
|
|
|
+ now_ts,
|
|
|
+ now_ts,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+
|
|
|
+ async def fetch_create_state(self, source_id: str, task_type: int):
|
|
|
+ query = f"""
|
|
|
+ SELECT source_id, task_type, status, retry_count, locked_at, remote_task_id, last_error
|
|
|
+ FROM {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ WHERE source_id = %s AND task_type = %s
|
|
|
+ LIMIT 1;
|
|
|
+ """
|
|
|
+ rows = await self.pool.async_fetch(query=query, params=(source_id, task_type))
|
|
|
+ if not rows:
|
|
|
+ return None
|
|
|
+ return rows[0]
|
|
|
+
|
|
|
+ async def acquire_create_lock(
|
|
|
+ self,
|
|
|
+ source_id: str,
|
|
|
+ task_type: int,
|
|
|
+ now_ts: int,
|
|
|
+ max_retry_times: int,
|
|
|
+ lock_expire_before: int,
|
|
|
+ ):
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ SET status = %s, locked_at = %s, updated_at = %s, last_error = NULL
|
|
|
+ WHERE source_id = %s
|
|
|
+ AND task_type = %s
|
|
|
+ AND (
|
|
|
+ status = %s
|
|
|
+ OR (status = %s AND retry_count < %s)
|
|
|
+ OR (status = %s AND locked_at > 0 AND locked_at < %s)
|
|
|
+ );
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ now_ts,
|
|
|
+ now_ts,
|
|
|
+ source_id,
|
|
|
+ task_type,
|
|
|
+ self.TaskStatus.INIT,
|
|
|
+ self.TaskStatus.FAILED,
|
|
|
+ max_retry_times,
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ lock_expire_before,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+
|
|
|
+ async def mark_create_success(
|
|
|
+ self,
|
|
|
+ source_id: str,
|
|
|
+ task_type: int,
|
|
|
+ remote_task_id: str,
|
|
|
+ now_ts: int,
|
|
|
+ remark: str = None,
|
|
|
+ ):
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ SET status = %s,
|
|
|
+ remote_task_id = %s,
|
|
|
+ last_error = %s,
|
|
|
+ locked_at = 0,
|
|
|
+ updated_at = %s
|
|
|
+ WHERE source_id = %s AND task_type = %s AND status = %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ self.TaskStatus.SUCCESS,
|
|
|
+ remote_task_id,
|
|
|
+ remark,
|
|
|
+ now_ts,
|
|
|
+ source_id,
|
|
|
+ task_type,
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+
|
|
|
+ async def mark_create_retry(
|
|
|
+ self,
|
|
|
+ source_id: str,
|
|
|
+ task_type: int,
|
|
|
+ now_ts: int,
|
|
|
+ error_message: str,
|
|
|
+ ):
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ SET status = %s,
|
|
|
+ retry_count = retry_count + 1,
|
|
|
+ last_error = %s,
|
|
|
+ locked_at = 0,
|
|
|
+ updated_at = %s
|
|
|
+ WHERE source_id = %s AND task_type = %s AND status = %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ self.TaskStatus.INIT,
|
|
|
+ error_message,
|
|
|
+ now_ts,
|
|
|
+ source_id,
|
|
|
+ task_type,
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+
|
|
|
+ async def mark_create_failed(
|
|
|
+ self,
|
|
|
+ source_id: str,
|
|
|
+ task_type: int,
|
|
|
+ now_ts: int,
|
|
|
+ error_message: str,
|
|
|
+ ):
|
|
|
+ query = f"""
|
|
|
+ UPDATE {self.INNER_DECODE_CREATE_STATE}
|
|
|
+ SET status = %s,
|
|
|
+ retry_count = retry_count + 1,
|
|
|
+ last_error = %s,
|
|
|
+ locked_at = 0,
|
|
|
+ updated_at = %s
|
|
|
+ WHERE source_id = %s AND task_type = %s AND status = %s;
|
|
|
+ """
|
|
|
+ return await self.pool.async_save(
|
|
|
+ query=query,
|
|
|
+ params=(
|
|
|
+ self.TaskStatus.FAILED,
|
|
|
+ error_message,
|
|
|
+ now_ts,
|
|
|
+ source_id,
|
|
|
+ task_type,
|
|
|
+ self.TaskStatus.PROCESSING,
|
|
|
+ ),
|
|
|
+ )
|
|
|
+
|
|
|
|
|
|
__all__ = [
|
|
|
"ArticlesDecodeTaskMapper",
|