_mapper.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396
  1. from typing import List, Dict
  2. from app.core.database import DatabaseManager
  3. from ._const import DecodeTaskConst
  4. class ArticlesDecodeTaskMapper(DecodeTaskConst):
  5. DECODE_TASK_QUEUE = "long_articles_decode_task_detail"
  6. INNER_DECODE_CREATE_STATE = "long_articles_inner_decode_create_state"
  7. def __init__(self, pool: DatabaseManager):
  8. self.pool = pool
  9. # 存储解构任务
  10. async def record_decode_task(
  11. self,
  12. task_id: str,
  13. content_id: str,
  14. task_type: int,
  15. payload: str,
  16. remark: str = None,
  17. ) -> int:
  18. query = f"""
  19. INSERT INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  20. VALUES (%s, %s, %s, %s, %s)
  21. """
  22. return await self.pool.async_save(
  23. query=query, params=(task_id, content_id, task_type, payload, remark)
  24. )
  25. async def record_decode_task_if_absent(
  26. self,
  27. task_id: str,
  28. content_id: str,
  29. task_type: int,
  30. payload: str,
  31. remark: str = None,
  32. ) -> int:
  33. query = f"""
  34. INSERT IGNORE INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  35. VALUES (%s, %s, %s, %s, %s)
  36. """
  37. return await self.pool.async_save(
  38. query=query, params=(task_id, content_id, task_type, payload, remark)
  39. )
  40. # 更新解构任务状态
  41. async def update_decode_task_status(
  42. self, task_id: str, ori_status: int, new_status: int, remark: str = None
  43. ) -> int:
  44. query = f"""
  45. UPDATE {self.DECODE_TASK_QUEUE}
  46. SET status = %s, remark = %s
  47. WHERE task_id = %s AND status = %s;
  48. """
  49. return await self.pool.async_save(
  50. query=query, params=(new_status, remark, task_id, ori_status)
  51. )
  52. # 设置解构结果
  53. async def set_decode_result(
  54. self, task_id: str, result: str, remark: str = None
  55. ) -> int:
  56. query = f"""
  57. UPDATE {self.DECODE_TASK_QUEUE}
  58. SET status = %s, remark = %s, result = %s
  59. WHERE task_id = %s AND status = %s;
  60. """
  61. return await self.pool.async_save(
  62. query=query,
  63. params=(
  64. self.TaskStatus.SUCCESS,
  65. remark,
  66. result,
  67. task_id,
  68. self.TaskStatus.PROCESSING,
  69. ),
  70. )
  71. # 获取待拉取结果的解构任务(status=INIT,尚未拿到解构结果)
  72. async def fetch_decoding_tasks(self) -> List[Dict]:
  73. query = f"""
  74. SELECT task_id FROM {self.DECODE_TASK_QUEUE} WHERE status = %s LIMIT %s;
  75. """
  76. return await self.pool.async_fetch(
  77. query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
  78. )
  79. # 获取待解析的任务(获取处理成功的任务)
  80. async def fetch_extract_tasks(self):
  81. query = f"""
  82. SELECT id, result FROM {self.DECODE_TASK_QUEUE}
  83. WHERE extract_status = %s AND status = %s;
  84. """
  85. return await self.pool.async_fetch(
  86. query=query, params=(self.ExtractStatus.INIT, self.TaskStatus.SUCCESS)
  87. )
  88. # 修改解析状态(用于加锁与状态流转)
  89. async def update_extract_status(self, task_id, ori_status, new_status):
  90. query = f"""
  91. UPDATE {self.DECODE_TASK_QUEUE}
  92. SET extract_status = %s WHERE extract_status = %s AND id = %s;
  93. """
  94. return await self.pool.async_save(
  95. query=query,
  96. params=(
  97. new_status,
  98. ori_status,
  99. task_id,
  100. ),
  101. )
  102. # 记录解析结果明细到 long_articles_decode_task_detail
  103. async def record_extract_detail(self, decode_task_id: int, detail: Dict) -> int:
  104. query = """
  105. INSERT INTO long_articles_decode_task_detail
  106. (decode_task_id, inspiration, purpose, key_point, topic)
  107. VALUES (%s, %s, %s, %s, %s);
  108. """
  109. return await self.pool.async_save(
  110. query=query,
  111. params=(
  112. decode_task_id,
  113. detail.get("inspiration", ""),
  114. detail.get("purpose", ""),
  115. detail.get("key_point", ""),
  116. detail.get("topic", ""),
  117. ),
  118. )
  119. # 判断是否存在相同的任务 id
  120. async def fetch_exist_source_id(self, source_id, task_type):
  121. query = f"""
  122. SELECT id FROM {self.DECODE_TASK_QUEUE}
  123. WHERE content_id = %s AND task_type = %s;
  124. """
  125. return await self.pool.async_fetch(query=query, params=(source_id, task_type))
  126. class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  127. def __init__(self, pool: DatabaseManager):
  128. super().__init__(pool)
  129. async def record_decode_task(
  130. self, task_id: str, wx_sn: str, remark: str = None
  131. ) -> int:
  132. return await super().record_decode_task(
  133. task_id=task_id,
  134. content_id=wx_sn,
  135. task_type=self.TaskType.SOURCE_IMAGES_TEXT,
  136. payload=self.AdPlatformDecodeTask.EMPTY_PAYLOAD_JSON,
  137. remark=remark,
  138. )
  139. # 修改文章解构状态
  140. async def update_article_decode_status(
  141. self, id_: int, ori_status: int, new_status: int
  142. ) -> int:
  143. query = """
  144. UPDATE ad_platform_accounts_daily_detail
  145. SET decode_status = %s
  146. WHERE id = %s AND decode_status = %s;
  147. """
  148. return await self.pool.async_save(
  149. query=query, params=(new_status, id_, ori_status)
  150. )
  151. # 获取待解构文章
  152. async def fetch_decode_articles(self) -> List[Dict]:
  153. query = """
  154. SELECT id, account_name, gh_id, article_title, article_cover,
  155. article_text, article_images, wx_sn
  156. FROM ad_platform_accounts_daily_detail WHERE fetch_status = %s AND decode_status = %s
  157. LIMIT %s;
  158. """
  159. return await self.pool.async_fetch(
  160. query=query,
  161. params=(self.TaskStatus.SUCCESS, self.TaskStatus.INIT, self.TASK_BATCH),
  162. )
  163. class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  164. def __init__(self, pool: DatabaseManager):
  165. super().__init__(pool)
  166. # 获取内部文章
  167. async def fetch_inner_articles(self, date_string=None):
  168. if date_string is None:
  169. date_string = self.InnerDecodeCreate.DEFAULT_GOOD_READ_DATE
  170. query = """
  171. SELECT title, source_id, wx_sn, cover_img_url FROM long_articles_good_read_article WHERE dt = %s
  172. ORDER by max_read_rate DESC;
  173. """
  174. return await self.pool.async_fetch(query=query, params=(date_string,))
  175. # 获取内部文章生成信息
  176. async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
  177. mod_types = (
  178. self.ProduceModuleType.COVER,
  179. self.ProduceModuleType.IMAGE,
  180. self.ProduceModuleType.TITLE,
  181. self.ProduceModuleType.CONTENT,
  182. self.ProduceModuleType.SUMMARY,
  183. )
  184. placeholders = ",".join(["%s"] * len(mod_types))
  185. query = f"""
  186. SELECT produce_module_type, output
  187. FROM produce_plan_module_output WHERE plan_exe_id = %s
  188. AND produce_module_type in ({placeholders});
  189. """
  190. return await self.pool.async_fetch(
  191. query=query, db_name="aigc", params=(source_id, *mod_types)
  192. )
  193. # 获取文章源信息
  194. async def fetch_article_crawler_source_info(self, source_id: str):
  195. query = """
  196. SELECT
  197. t2.channel_content_id, t3.body_text
  198. FROM produce_plan_exe_record t1
  199. LEFT JOIN produce_plan_exe_refer_content t2 ON t2.plan_exe_id = t1.plan_exe_id
  200. LEFT JOIN crawler_content_blob t3 ON t3.channel_content_id = t2.channel_content_id
  201. WHERE
  202. t1.plan_exe_id = %s;
  203. """
  204. return await self.pool.async_fetch(
  205. query=query, db_name="aigc", params=(source_id,)
  206. )
  207. async def init_create_state(self, source_id: str, task_type: int, now_ts: int):
  208. query = f"""
  209. INSERT IGNORE INTO {self.INNER_DECODE_CREATE_STATE}
  210. (source_id, task_type, status, retry_count, locked_at, created_at, updated_at)
  211. VALUES (%s, %s, %s, %s, %s, %s, %s);
  212. """
  213. return await self.pool.async_save(
  214. query=query,
  215. params=(
  216. source_id,
  217. task_type,
  218. self.TaskStatus.INIT,
  219. self.InnerCreateState.INITIAL_RETRY_COUNT,
  220. self.InnerCreateState.INITIAL_LOCKED_AT,
  221. now_ts,
  222. now_ts,
  223. ),
  224. )
  225. async def fetch_create_state(self, source_id: str, task_type: int):
  226. query = f"""
  227. SELECT source_id, task_type, status, retry_count, locked_at, remote_task_id, last_error
  228. FROM {self.INNER_DECODE_CREATE_STATE}
  229. WHERE source_id = %s AND task_type = %s
  230. LIMIT %s;
  231. """
  232. rows = await self.pool.async_fetch(
  233. query=query,
  234. params=(source_id, task_type, self.InnerCreateState.FETCH_STATE_ROW_LIMIT),
  235. )
  236. if not rows:
  237. return None
  238. return rows[0]
  239. async def acquire_create_lock(
  240. self,
  241. source_id: str,
  242. task_type: int,
  243. now_ts: int,
  244. max_retry_times: int,
  245. lock_expire_before: int,
  246. ):
  247. query = f"""
  248. UPDATE {self.INNER_DECODE_CREATE_STATE}
  249. SET status = %s, locked_at = %s, updated_at = %s, last_error = NULL
  250. WHERE source_id = %s
  251. AND task_type = %s
  252. AND (
  253. status = %s
  254. OR (status = %s AND retry_count < %s)
  255. OR (status = %s AND locked_at > %s AND locked_at < %s)
  256. );
  257. """
  258. return await self.pool.async_save(
  259. query=query,
  260. params=(
  261. self.TaskStatus.PROCESSING,
  262. now_ts,
  263. now_ts,
  264. source_id,
  265. task_type,
  266. self.TaskStatus.INIT,
  267. self.TaskStatus.FAILED,
  268. max_retry_times,
  269. self.TaskStatus.PROCESSING,
  270. self.InnerCreateState.LOCKED_AT_CLEARED,
  271. lock_expire_before,
  272. ),
  273. )
  274. async def mark_create_success(
  275. self,
  276. source_id: str,
  277. task_type: int,
  278. remote_task_id: str,
  279. now_ts: int,
  280. remark: str = None,
  281. ):
  282. query = f"""
  283. UPDATE {self.INNER_DECODE_CREATE_STATE}
  284. SET status = %s,
  285. remote_task_id = %s,
  286. last_error = %s,
  287. locked_at = %s,
  288. updated_at = %s
  289. WHERE source_id = %s AND task_type = %s AND status = %s;
  290. """
  291. return await self.pool.async_save(
  292. query=query,
  293. params=(
  294. self.TaskStatus.SUCCESS,
  295. remote_task_id,
  296. remark,
  297. self.InnerCreateState.LOCKED_AT_CLEARED,
  298. now_ts,
  299. source_id,
  300. task_type,
  301. self.TaskStatus.PROCESSING,
  302. ),
  303. )
  304. async def mark_create_retry(
  305. self,
  306. source_id: str,
  307. task_type: int,
  308. now_ts: int,
  309. error_message: str,
  310. ):
  311. query = f"""
  312. UPDATE {self.INNER_DECODE_CREATE_STATE}
  313. SET status = %s,
  314. retry_count = retry_count + 1,
  315. last_error = %s,
  316. locked_at = %s,
  317. updated_at = %s
  318. WHERE source_id = %s AND task_type = %s AND status = %s;
  319. """
  320. return await self.pool.async_save(
  321. query=query,
  322. params=(
  323. self.TaskStatus.INIT,
  324. error_message,
  325. self.InnerCreateState.LOCKED_AT_CLEARED,
  326. now_ts,
  327. source_id,
  328. task_type,
  329. self.TaskStatus.PROCESSING,
  330. ),
  331. )
  332. async def mark_create_failed(
  333. self,
  334. source_id: str,
  335. task_type: int,
  336. now_ts: int,
  337. error_message: str,
  338. ):
  339. query = f"""
  340. UPDATE {self.INNER_DECODE_CREATE_STATE}
  341. SET status = %s,
  342. retry_count = retry_count + 1,
  343. last_error = %s,
  344. locked_at = %s,
  345. updated_at = %s
  346. WHERE source_id = %s AND task_type = %s AND status = %s;
  347. """
  348. return await self.pool.async_save(
  349. query=query,
  350. params=(
  351. self.TaskStatus.FAILED,
  352. error_message,
  353. self.InnerCreateState.LOCKED_AT_CLEARED,
  354. now_ts,
  355. source_id,
  356. task_type,
  357. self.TaskStatus.PROCESSING,
  358. ),
  359. )
  360. __all__ = [
  361. "ArticlesDecodeTaskMapper",
  362. "AdPlatformArticlesDecodeTaskMapper",
  363. "InnerArticlesDecodeTaskMapper",
  364. ]