_mapper.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. from typing import List, Dict
  2. from app.core.database import DatabaseManager
  3. from ._const import DecodeTaskConst
  4. class ArticlesDecodeTaskMapper(DecodeTaskConst):
  5. DECODE_TASK_QUEUE = "long_articles_decode_tasks"
  6. INNER_DECODE_CREATE_STATE = "long_articles_inner_decode_create_state"
  7. def __init__(self, pool: DatabaseManager):
  8. self.pool = pool
  9. # 存储解构任务
  10. async def record_decode_task(
  11. self,
  12. task_id: str,
  13. content_id: str,
  14. task_type: int,
  15. payload: str,
  16. remark: str = None,
  17. ) -> int:
  18. query = f"""
  19. INSERT INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  20. VALUES (%s, %s, %s, %s, %s)
  21. """
  22. return await self.pool.async_save(
  23. query=query, params=(task_id, content_id, task_type, payload, remark)
  24. )
  25. async def record_decode_task_if_absent(
  26. self,
  27. task_id: str,
  28. content_id: str,
  29. task_type: int,
  30. payload: str,
  31. remark: str = None,
  32. ) -> int:
  33. query = f"""
  34. INSERT IGNORE INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  35. VALUES (%s, %s, %s, %s, %s)
  36. """
  37. return await self.pool.async_save(
  38. query=query, params=(task_id, content_id, task_type, payload, remark)
  39. )
  40. # 更新解构任务状态
  41. async def update_decode_task_status(
  42. self, task_id: str, ori_status: int, new_status: int, remark: str = None
  43. ) -> int:
  44. query = f"""
  45. UPDATE {self.DECODE_TASK_QUEUE}
  46. SET status = %s, remark = %s
  47. WHERE task_id = %s AND status = %s;
  48. """
  49. return await self.pool.async_save(
  50. query=query, params=(new_status, remark, task_id, ori_status)
  51. )
  52. # 设置解构结果
  53. async def set_decode_result(
  54. self, task_id: str, result: str, remark: str = None
  55. ) -> int:
  56. query = f"""
  57. UPDATE {self.DECODE_TASK_QUEUE}
  58. SET status = %s, remark = %s, result = %s
  59. WHERE task_id = %s AND status = %s;
  60. """
  61. return await self.pool.async_save(
  62. query=query,
  63. params=(
  64. self.TaskStatus.SUCCESS,
  65. remark,
  66. result,
  67. task_id,
  68. self.TaskStatus.PROCESSING,
  69. ),
  70. )
  71. # 获取待拉取结果的解构任务(status=INIT,尚未拿到解构结果)
  72. async def fetch_decoding_tasks(self) -> List[Dict]:
  73. query = f"""
  74. SELECT task_id FROM {self.DECODE_TASK_QUEUE} WHERE status = %s LIMIT %s;
  75. """
  76. return await self.pool.async_fetch(
  77. query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
  78. )
  79. # 获取待解析的任务(获取处理成功的任务)
  80. async def fetch_extract_tasks(self):
  81. query = f"""
  82. SELECT id, result FROM {self.DECODE_TASK_QUEUE}
  83. WHERE extract_status = %s AND status = %s;
  84. """
  85. return await self.pool.async_fetch(
  86. query=query, params=(self.ExtractStatus.INIT, self.TaskStatus.SUCCESS)
  87. )
  88. # 修改解析状态(用于加锁与状态流转)
  89. async def update_extract_status(self, task_id, ori_status, new_status):
  90. query = f"""
  91. UPDATE {self.DECODE_TASK_QUEUE}
  92. SET extract_status = %s WHERE extract_status = %s AND id = %s;
  93. """
  94. return await self.pool.async_save(
  95. query=query,
  96. params=(
  97. new_status,
  98. ori_status,
  99. task_id,
  100. ),
  101. )
  102. # 记录解析结果明细到 long_articles_decode_task_detail
  103. async def record_extract_detail(self, decode_task_id: int, detail: Dict) -> int:
  104. query = """
  105. INSERT INTO long_articles_decode_task_detail
  106. (decode_task_id, inspiration, purpose, key_point, topic)
  107. VALUES (%s, %s, %s, %s, %s);
  108. """
  109. return await self.pool.async_save(
  110. query=query,
  111. params=(
  112. decode_task_id,
  113. detail.get("inspiration", ""),
  114. detail.get("purpose", ""),
  115. detail.get("key_point", ""),
  116. detail.get("topic", ""),
  117. ),
  118. )
  119. # 判断是否存在相同的任务 id
  120. async def fetch_exist_source_id(self, source_id, task_type):
  121. query = f"""
  122. SELECT id FROM {self.DECODE_TASK_QUEUE}
  123. WHERE content_id = %s AND task_type = %s;
  124. """
  125. return await self.pool.async_fetch(query=query, params=(source_id, task_type))
  126. class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  127. def __init__(self, pool: DatabaseManager):
  128. super().__init__(pool)
  129. async def record_decode_task(
  130. self, task_id: str, wx_sn: str, remark: str = None
  131. ) -> int:
  132. return await super().record_decode_task(
  133. task_id=task_id,
  134. content_id=wx_sn,
  135. task_type=self.TaskType.SOURCE_IMAGES_TEXT,
  136. payload=self.AdPlatformDecodeTask.EMPTY_PAYLOAD_JSON,
  137. remark=remark,
  138. )
  139. # 修改文章解构状态
  140. async def update_article_decode_status(
  141. self, id_: int, ori_status: int, new_status: int
  142. ) -> int:
  143. query = """
  144. UPDATE ad_platform_accounts_daily_detail
  145. SET decode_status = %s
  146. WHERE id = %s AND decode_status = %s;
  147. """
  148. return await self.pool.async_save(
  149. query=query, params=(new_status, id_, ori_status)
  150. )
  151. # 获取待解构文章
  152. async def fetch_decode_articles(self) -> List[Dict]:
  153. query = """
  154. SELECT id, account_name, gh_id, article_title, article_cover,
  155. article_text, article_images, wx_sn
  156. FROM ad_platform_accounts_daily_detail WHERE fetch_status = %s AND decode_status = %s
  157. LIMIT %s;
  158. """
  159. return await self.pool.async_fetch(
  160. query=query,
  161. params=(self.TaskStatus.SUCCESS, self.TaskStatus.INIT, self.TASK_BATCH),
  162. )
  163. class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  164. def __init__(self, pool: DatabaseManager):
  165. super().__init__(pool)
  166. # 获取内部文章
  167. async def fetch_inner_articles(self, date_string=None):
  168. if date_string is None:
  169. date_string = self.InnerDecodeCreate.DEFAULT_GOOD_READ_DATE
  170. query = """
  171. SELECT title, source_id, wx_sn, cover_img_url FROM long_articles_good_read_article WHERE dt = %s
  172. ORDER by max_read_rate DESC
  173. ;
  174. """
  175. return await self.pool.async_fetch(query=query, params=(date_string,))
  176. # 获取内部文章生成信息
  177. async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
  178. mod_types = (
  179. self.ProduceModuleType.COVER,
  180. self.ProduceModuleType.IMAGE,
  181. self.ProduceModuleType.TITLE,
  182. self.ProduceModuleType.CONTENT,
  183. self.ProduceModuleType.SUMMARY,
  184. )
  185. placeholders = ",".join(["%s"] * len(mod_types))
  186. query = f"""
  187. SELECT produce_module_type, output
  188. FROM produce_plan_module_output WHERE plan_exe_id = %s
  189. AND produce_module_type in ({placeholders});
  190. """
  191. return await self.pool.async_fetch(
  192. query=query, db_name="aigc", params=(source_id, *mod_types)
  193. )
  194. # 获取文章源信息
  195. async def fetch_article_crawler_source_info(self, source_id: str):
  196. query = """
  197. SELECT
  198. t2.channel_content_id, t3.body_text
  199. FROM produce_plan_exe_record t1
  200. LEFT JOIN produce_plan_exe_refer_content t2 ON t2.plan_exe_id = t1.plan_exe_id
  201. LEFT JOIN crawler_content_blob t3 ON t3.channel_content_id = t2.channel_content_id
  202. WHERE
  203. t1.plan_exe_id = %s;
  204. """
  205. return await self.pool.async_fetch(
  206. query=query, db_name="aigc", params=(source_id,)
  207. )
  208. async def init_create_state(self, source_id: str, task_type: int, now_ts: int):
  209. query = f"""
  210. INSERT IGNORE INTO {self.INNER_DECODE_CREATE_STATE}
  211. (source_id, task_type, status, retry_count, locked_at, created_at, updated_at)
  212. VALUES (%s, %s, %s, %s, %s, %s, %s);
  213. """
  214. return await self.pool.async_save(
  215. query=query,
  216. params=(
  217. source_id,
  218. task_type,
  219. self.TaskStatus.INIT,
  220. self.InnerCreateState.INITIAL_RETRY_COUNT,
  221. self.InnerCreateState.INITIAL_LOCKED_AT,
  222. now_ts,
  223. now_ts,
  224. ),
  225. )
  226. async def fetch_create_state(self, source_id: str, task_type: int):
  227. query = f"""
  228. SELECT source_id, task_type, status, retry_count, locked_at, remote_task_id, last_error
  229. FROM {self.INNER_DECODE_CREATE_STATE}
  230. WHERE source_id = %s AND task_type = %s
  231. LIMIT %s;
  232. """
  233. rows = await self.pool.async_fetch(
  234. query=query,
  235. params=(source_id, task_type, self.InnerCreateState.FETCH_STATE_ROW_LIMIT),
  236. )
  237. if not rows:
  238. return None
  239. return rows[0]
  240. async def acquire_create_lock(
  241. self,
  242. source_id: str,
  243. task_type: int,
  244. now_ts: int,
  245. max_retry_times: int,
  246. lock_expire_before: int,
  247. ):
  248. query = f"""
  249. UPDATE {self.INNER_DECODE_CREATE_STATE}
  250. SET status = %s, locked_at = %s, updated_at = %s, last_error = NULL
  251. WHERE source_id = %s
  252. AND task_type = %s
  253. AND (
  254. status = %s
  255. OR (status = %s AND retry_count < %s)
  256. OR (status = %s AND locked_at > %s AND locked_at < %s)
  257. );
  258. """
  259. return await self.pool.async_save(
  260. query=query,
  261. params=(
  262. self.TaskStatus.PROCESSING,
  263. now_ts,
  264. now_ts,
  265. source_id,
  266. task_type,
  267. self.TaskStatus.INIT,
  268. self.TaskStatus.FAILED,
  269. max_retry_times,
  270. self.TaskStatus.PROCESSING,
  271. self.InnerCreateState.LOCKED_AT_CLEARED,
  272. lock_expire_before,
  273. ),
  274. )
  275. async def mark_create_success(
  276. self,
  277. source_id: str,
  278. task_type: int,
  279. remote_task_id: str,
  280. now_ts: int,
  281. remark: str = None,
  282. ):
  283. query = f"""
  284. UPDATE {self.INNER_DECODE_CREATE_STATE}
  285. SET status = %s,
  286. remote_task_id = %s,
  287. last_error = %s,
  288. locked_at = %s,
  289. updated_at = %s
  290. WHERE source_id = %s AND task_type = %s AND status = %s;
  291. """
  292. return await self.pool.async_save(
  293. query=query,
  294. params=(
  295. self.TaskStatus.SUCCESS,
  296. remote_task_id,
  297. remark,
  298. self.InnerCreateState.LOCKED_AT_CLEARED,
  299. now_ts,
  300. source_id,
  301. task_type,
  302. self.TaskStatus.PROCESSING,
  303. ),
  304. )
  305. async def mark_create_retry(
  306. self,
  307. source_id: str,
  308. task_type: int,
  309. now_ts: int,
  310. error_message: str,
  311. ):
  312. query = f"""
  313. UPDATE {self.INNER_DECODE_CREATE_STATE}
  314. SET status = %s,
  315. retry_count = retry_count + 1,
  316. last_error = %s,
  317. locked_at = %s,
  318. updated_at = %s
  319. WHERE source_id = %s AND task_type = %s AND status = %s;
  320. """
  321. return await self.pool.async_save(
  322. query=query,
  323. params=(
  324. self.TaskStatus.INIT,
  325. error_message,
  326. self.InnerCreateState.LOCKED_AT_CLEARED,
  327. now_ts,
  328. source_id,
  329. task_type,
  330. self.TaskStatus.PROCESSING,
  331. ),
  332. )
  333. async def mark_create_failed(
  334. self,
  335. source_id: str,
  336. task_type: int,
  337. now_ts: int,
  338. error_message: str,
  339. ):
  340. query = f"""
  341. UPDATE {self.INNER_DECODE_CREATE_STATE}
  342. SET status = %s,
  343. retry_count = retry_count + 1,
  344. last_error = %s,
  345. locked_at = %s,
  346. updated_at = %s
  347. WHERE source_id = %s AND task_type = %s AND status = %s;
  348. """
  349. return await self.pool.async_save(
  350. query=query,
  351. params=(
  352. self.TaskStatus.FAILED,
  353. error_message,
  354. self.InnerCreateState.LOCKED_AT_CLEARED,
  355. now_ts,
  356. source_id,
  357. task_type,
  358. self.TaskStatus.PROCESSING,
  359. ),
  360. )
  361. __all__ = [
  362. "ArticlesDecodeTaskMapper",
  363. "AdPlatformArticlesDecodeTaskMapper",
  364. "InnerArticlesDecodeTaskMapper",
  365. ]