_mapper.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372
  1. from typing import List, Dict
  2. from app.core.database import DatabaseManager
  3. from ._const import DecodeTaskConst
  4. class ArticlesDecodeTaskMapper(DecodeTaskConst):
  5. DECODE_TASK_QUEUE = "long_articles_new_decode_tasks"
  6. INNER_DECODE_CREATE_STATE = "long_articles_inner_decode_create_state"
  7. def __init__(self, pool: DatabaseManager):
  8. self.pool = pool
  9. # 存储解构任务
  10. async def record_decode_task(
  11. self, task_id: str, content_id: str, task_type: int, payload: str, remark: str = None
  12. ) -> int:
  13. query = f"""
  14. INSERT INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  15. VALUES (%s, %s, %s, %s, %s)
  16. """
  17. return await self.pool.async_save(query=query, params=(task_id, content_id, task_type, payload, remark))
  18. async def record_decode_task_if_absent(
  19. self, task_id: str, content_id: str, task_type: int, payload: str, remark: str = None
  20. ) -> int:
  21. query = f"""
  22. INSERT IGNORE INTO {self.DECODE_TASK_QUEUE} (task_id, content_id, task_type, payload, remark)
  23. VALUES (%s, %s, %s, %s, %s)
  24. """
  25. return await self.pool.async_save(
  26. query=query, params=(task_id, content_id, task_type, payload, remark)
  27. )
  28. # 更新解构任务状态
  29. async def update_decode_task_status(
  30. self, task_id: str, ori_status: int, new_status: int, remark: str = None
  31. ) -> int:
  32. query = f"""
  33. UPDATE {self.DECODE_TASK_QUEUE}
  34. SET status = %s, remark = %s
  35. WHERE task_id = %s AND status = %s;
  36. """
  37. return await self.pool.async_save(
  38. query=query, params=(new_status, remark, task_id, ori_status)
  39. )
  40. # 设置解构结果
  41. async def set_decode_result(
  42. self, task_id: str, result: str, remark: str = None
  43. ) -> int:
  44. query = f"""
  45. UPDATE {self.DECODE_TASK_QUEUE}
  46. SET status = %s, remark = %s, result = %s
  47. WHERE task_id = %s AND status = %s;
  48. """
  49. return await self.pool.async_save(
  50. query=query,
  51. params=(
  52. self.TaskStatus.SUCCESS,
  53. remark,
  54. result,
  55. task_id,
  56. self.TaskStatus.PROCESSING,
  57. ),
  58. )
  59. # 获取待拉取结果的解构任务(status=INIT,尚未拿到解构结果)
  60. async def fetch_decoding_tasks(self) -> List[Dict]:
  61. query = f"""
  62. SELECT task_id FROM {self.DECODE_TASK_QUEUE} WHERE status = %s LIMIT %s;
  63. """
  64. return await self.pool.async_fetch(
  65. query=query, params=(self.TaskStatus.INIT, self.TASK_BATCH)
  66. )
  67. # 获取待解析的任务(获取处理成功的任务)
  68. async def fetch_extract_tasks(self):
  69. query = f"""
  70. SELECT id, result FROM {self.DECODE_TASK_QUEUE}
  71. WHERE extract_status = %s AND status = %s;
  72. """
  73. return await self.pool.async_fetch(
  74. query=query, params=(self.ExtractStatus.INIT, self.TaskStatus.SUCCESS)
  75. )
  76. # 修改解析状态(用于加锁与状态流转)
  77. async def update_extract_status(self, task_id, ori_status, new_status):
  78. query = f"""
  79. UPDATE {self.DECODE_TASK_QUEUE}
  80. SET extract_status = %s WHERE extract_status = %s AND id = %s;
  81. """
  82. return await self.pool.async_save(
  83. query=query,
  84. params=(
  85. new_status,
  86. ori_status,
  87. task_id,
  88. ),
  89. )
  90. # 记录解析结果明细到 long_articles_decode_task_detail
  91. async def record_extract_detail(self, decode_task_id: int, detail: Dict) -> int:
  92. query = """
  93. INSERT INTO long_articles_decode_task_detail
  94. (decode_task_id, inspiration, purpose, key_point, topic)
  95. VALUES (%s, %s, %s, %s, %s);
  96. """
  97. return await self.pool.async_save(
  98. query=query,
  99. params=(
  100. decode_task_id,
  101. detail.get("inspiration", ""),
  102. detail.get("purpose", ""),
  103. detail.get("key_point", ""),
  104. detail.get("topic", ""),
  105. ),
  106. )
  107. # 判断是否存在相同的任务 id
  108. async def fetch_exist_source_id(self, source_id, task_type):
  109. query = f"""
  110. SELECT id FROM {self.DECODE_TASK_QUEUE}
  111. WHERE content_id = %s AND task_type = %s;
  112. """
  113. return await self.pool.async_fetch(
  114. query=query, params=(source_id, task_type)
  115. )
  116. class AdPlatformArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  117. def __init__(self, pool: DatabaseManager):
  118. super().__init__(pool)
  119. async def record_decode_task(
  120. self, task_id: str, wx_sn: str, remark: str = None
  121. ) -> int:
  122. return await super().record_decode_task(
  123. task_id=task_id,
  124. content_id=wx_sn,
  125. task_type=self.TaskType.SOURCE_IMAGES_TEXT,
  126. payload="{}",
  127. remark=remark,
  128. )
  129. # 修改文章解构状态
  130. async def update_article_decode_status(
  131. self, id_: int, ori_status: int, new_status: int
  132. ) -> int:
  133. query = """
  134. UPDATE ad_platform_accounts_daily_detail
  135. SET decode_status = %s
  136. WHERE id = %s AND decode_status = %s;
  137. """
  138. return await self.pool.async_save(
  139. query=query, params=(new_status, id_, ori_status)
  140. )
  141. # 获取待解构文章
  142. async def fetch_decode_articles(self) -> List[Dict]:
  143. query = """
  144. SELECT id, account_name, gh_id, article_title, article_cover,
  145. article_text, article_images, wx_sn
  146. FROM ad_platform_accounts_daily_detail WHERE fetch_status = %s AND decode_status = %s
  147. LIMIT %s;
  148. """
  149. return await self.pool.async_fetch(
  150. query=query,
  151. params=(self.TaskStatus.SUCCESS, self.TaskStatus.INIT, self.TASK_BATCH),
  152. )
  153. class InnerArticlesDecodeTaskMapper(ArticlesDecodeTaskMapper):
  154. def __init__(self, pool: DatabaseManager):
  155. super().__init__(pool)
  156. # 获取内部文章
  157. async def fetch_inner_articles(self, date_string="20260401"):
  158. query = """
  159. SELECT title, source_id, wx_sn, cover_img_url FROM long_articles_good_read_article WHERE dt = %s
  160. AND source_id IN ('20260222161421405412536', '20241011051312526697231', '20241125113847098601958', '20241011042712648349812')
  161. ORDER by max_read_rate DESC LIMIT %s;
  162. """
  163. return await self.pool.async_fetch(
  164. query=query, params=(date_string, 20)
  165. )
  166. # 获取内部文章生成信息
  167. async def fetch_inner_articles_produce_detail(self, source_id) -> List[Dict]:
  168. query = """
  169. SELECT produce_module_type, output
  170. FROM produce_plan_module_output WHERE plan_exe_id = %s
  171. AND produce_module_type in (1,2,3,4,18);
  172. """
  173. return await self.pool.async_fetch(
  174. query=query, db_name="aigc", params=(source_id,)
  175. )
  176. # 获取文章源信息
  177. async def fetch_article_crawler_source_info(self, source_id: str):
  178. query = """
  179. SELECT
  180. t2.channel_content_id, t3.body_text
  181. FROM produce_plan_exe_record t1
  182. LEFT JOIN produce_plan_exe_refer_content t2 ON t2.plan_exe_id = t1.plan_exe_id
  183. LEFT JOIN crawler_content_blob t3 ON t3.channel_content_id = t2.channel_content_id
  184. WHERE
  185. t1.plan_exe_id = %s;
  186. """
  187. return await self.pool.async_fetch(
  188. query=query, db_name="aigc", params=(source_id,)
  189. )
  190. async def init_create_state(self, source_id: str, task_type: int, now_ts: int):
  191. query = f"""
  192. INSERT IGNORE INTO {self.INNER_DECODE_CREATE_STATE}
  193. (source_id, task_type, status, retry_count, locked_at, created_at, updated_at)
  194. VALUES (%s, %s, %s, %s, %s, %s, %s);
  195. """
  196. return await self.pool.async_save(
  197. query=query,
  198. params=(
  199. source_id,
  200. task_type,
  201. self.TaskStatus.INIT,
  202. 0,
  203. 0,
  204. now_ts,
  205. now_ts,
  206. ),
  207. )
  208. async def fetch_create_state(self, source_id: str, task_type: int):
  209. query = f"""
  210. SELECT source_id, task_type, status, retry_count, locked_at, remote_task_id, last_error
  211. FROM {self.INNER_DECODE_CREATE_STATE}
  212. WHERE source_id = %s AND task_type = %s
  213. LIMIT 1;
  214. """
  215. rows = await self.pool.async_fetch(query=query, params=(source_id, task_type))
  216. if not rows:
  217. return None
  218. return rows[0]
  219. async def acquire_create_lock(
  220. self,
  221. source_id: str,
  222. task_type: int,
  223. now_ts: int,
  224. max_retry_times: int,
  225. lock_expire_before: int,
  226. ):
  227. query = f"""
  228. UPDATE {self.INNER_DECODE_CREATE_STATE}
  229. SET status = %s, locked_at = %s, updated_at = %s, last_error = NULL
  230. WHERE source_id = %s
  231. AND task_type = %s
  232. AND (
  233. status = %s
  234. OR (status = %s AND retry_count < %s)
  235. OR (status = %s AND locked_at > 0 AND locked_at < %s)
  236. );
  237. """
  238. return await self.pool.async_save(
  239. query=query,
  240. params=(
  241. self.TaskStatus.PROCESSING,
  242. now_ts,
  243. now_ts,
  244. source_id,
  245. task_type,
  246. self.TaskStatus.INIT,
  247. self.TaskStatus.FAILED,
  248. max_retry_times,
  249. self.TaskStatus.PROCESSING,
  250. lock_expire_before,
  251. ),
  252. )
  253. async def mark_create_success(
  254. self,
  255. source_id: str,
  256. task_type: int,
  257. remote_task_id: str,
  258. now_ts: int,
  259. remark: str = None,
  260. ):
  261. query = f"""
  262. UPDATE {self.INNER_DECODE_CREATE_STATE}
  263. SET status = %s,
  264. remote_task_id = %s,
  265. last_error = %s,
  266. locked_at = 0,
  267. updated_at = %s
  268. WHERE source_id = %s AND task_type = %s AND status = %s;
  269. """
  270. return await self.pool.async_save(
  271. query=query,
  272. params=(
  273. self.TaskStatus.SUCCESS,
  274. remote_task_id,
  275. remark,
  276. now_ts,
  277. source_id,
  278. task_type,
  279. self.TaskStatus.PROCESSING,
  280. ),
  281. )
  282. async def mark_create_retry(
  283. self,
  284. source_id: str,
  285. task_type: int,
  286. now_ts: int,
  287. error_message: str,
  288. ):
  289. query = f"""
  290. UPDATE {self.INNER_DECODE_CREATE_STATE}
  291. SET status = %s,
  292. retry_count = retry_count + 1,
  293. last_error = %s,
  294. locked_at = 0,
  295. updated_at = %s
  296. WHERE source_id = %s AND task_type = %s AND status = %s;
  297. """
  298. return await self.pool.async_save(
  299. query=query,
  300. params=(
  301. self.TaskStatus.INIT,
  302. error_message,
  303. now_ts,
  304. source_id,
  305. task_type,
  306. self.TaskStatus.PROCESSING,
  307. ),
  308. )
  309. async def mark_create_failed(
  310. self,
  311. source_id: str,
  312. task_type: int,
  313. now_ts: int,
  314. error_message: str,
  315. ):
  316. query = f"""
  317. UPDATE {self.INNER_DECODE_CREATE_STATE}
  318. SET status = %s,
  319. retry_count = retry_count + 1,
  320. last_error = %s,
  321. locked_at = 0,
  322. updated_at = %s
  323. WHERE source_id = %s AND task_type = %s AND status = %s;
  324. """
  325. return await self.pool.async_save(
  326. query=query,
  327. params=(
  328. self.TaskStatus.FAILED,
  329. error_message,
  330. now_ts,
  331. source_id,
  332. task_type,
  333. self.TaskStatus.PROCESSING,
  334. ),
  335. )
  336. __all__ = [
  337. "ArticlesDecodeTaskMapper",
  338. "AdPlatformArticlesDecodeTaskMapper",
  339. "InnerArticlesDecodeTaskMapper",
  340. ]