task_manager_service.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166
  1. import json
  2. from typing import Optional
  3. from app.core.config import GlobalConfigSettings
  4. from app.core.config.settings import TaskChineseNameConfig
  5. class TaskConst:
  6. INIT_STATUS = 0
  7. PROCESSING_STATUS = 1
  8. FINISHED_STATUS = 2
  9. FAILED_STATUS = 99
  10. STATUS_TEXT = {0: "初始化", 1: "处理中", 2: "完成", 99: "失败"}
  11. DEFAULT_PAGE = 1
  12. DEFAULT_SIZE = 50
  13. class TaskManagerUtils(TaskConst):
  14. def __init__(self, config: TaskChineseNameConfig):
  15. self.config = config
  16. def get_task_chinese_name(self, data):
  17. """
  18. 通过输入任务详情信息获取任务名称
  19. """
  20. task_name = data["task_name"]
  21. task_name_chinese = self.config.name_map.get(task_name, task_name)
  22. # account_method
  23. if task_name == "crawler_gzh_articles":
  24. account_method = data.get("account_method", "")
  25. account_method = account_method.replace(
  26. "account_association", "账号联想"
  27. ).replace("search", "")
  28. crawl_mode = data.get("crawl_mode", "")
  29. crawl_mode = crawl_mode.replace("search", "搜索").replace(
  30. "account", "抓账号"
  31. )
  32. strategy = data.get("strategy", "")
  33. return f"{task_name_chinese}\t{crawl_mode}\t{account_method}\t{strategy}"
  34. elif task_name == "article_pool_cold_start":
  35. platform = data.get("platform", "")
  36. platform = platform.replace("toutiao", "今日头条").replace("weixin", "微信")
  37. strategy = data.get("strategy", "")
  38. strategy = strategy.replace("strategy", "策略")
  39. category_list = data.get("category_list", [])
  40. category_list = "、".join(category_list)
  41. crawler_methods = data.get("crawler_methods", [])
  42. crawler_methods = "、".join(crawler_methods)
  43. return f"{task_name_chinese}\t{platform}\t{crawler_methods}\t{category_list}\t{strategy}"
  44. else:
  45. return task_name_chinese
  46. @staticmethod
  47. def _build_where(id_eq=None, date_string=None, trace_id=None, task_status=None):
  48. conds, params = [], []
  49. if id_eq is not None:
  50. conds.append("id = %s")
  51. params.append(id_eq)
  52. if date_string: # 字符串非空
  53. conds.append("date_string = %s")
  54. params.append(date_string)
  55. if trace_id:
  56. conds.append("trace_id LIKE %s")
  57. # 如果调用方已经传了 %,就原样用;否则自动做包含匹配
  58. params.append(trace_id if "%" in trace_id else f"%{trace_id}%")
  59. if task_status is not None:
  60. conds.append("task_status = %s")
  61. params.append(task_status)
  62. where_clause = " AND ".join(conds) if conds else "1=1"
  63. return where_clause, params
  64. @staticmethod
  65. def _safe_json(v):
  66. try:
  67. if isinstance(v, (str, bytes, bytearray)):
  68. return json.loads(v)
  69. return v or {}
  70. except Exception:
  71. return {}
  72. class TaskManager(TaskManagerUtils):
  73. def __init__(self, pool, data, config: GlobalConfigSettings):
  74. super().__init__(config.task_chinese_name)
  75. self.pool = pool
  76. self.data = data
  77. async def list_tasks(self):
  78. page = self.data.get("page", self.DEFAULT_PAGE)
  79. page_size = self.data.get("size", self.DEFAULT_SIZE)
  80. sort_by = self.data.get("sort_by", "id")
  81. sort_dir = self.data.get("sort_dir", "desc").lower()
  82. # 过滤条件
  83. id_eq: Optional[int] = self.data.get("id") and int(self.data.get("id"))
  84. date_string: Optional[str] = self.data.get("date_string")
  85. trace_id: Optional[str] = self.data.get("trace_id")
  86. task_status: Optional[int] = self.data.get("task_status") and int(
  87. self.data.get("task_status")
  88. )
  89. # 1) WHERE 子句
  90. where_clause, params = self._build_where(
  91. id_eq, date_string, trace_id, task_status
  92. )
  93. sort_whitelist = {
  94. "id",
  95. "date_string",
  96. "task_status",
  97. "start_timestamp",
  98. "finish_timestamp",
  99. }
  100. sort_by = sort_by if sort_by in sort_whitelist else "id"
  101. sort_dir = "ASC" if str(sort_dir).lower() == "asc" else "DESC"
  102. # 3) 分页(边界保护)
  103. page = max(1, int(page))
  104. page_size = max(1, min(int(page_size), 200)) # 适当限流
  105. offset = (page - 1) * page_size
  106. # 4) 统计总数(注意:WHERE 片段直接插入,值用参数化)
  107. sql_count = f"""
  108. SELECT COUNT(1) AS cnt
  109. FROM long_articles_task_manager
  110. WHERE {where_clause}
  111. """
  112. count_rows = await self.pool.async_fetch(query=sql_count, params=tuple(params))
  113. total = count_rows[0]["cnt"] if count_rows else 0
  114. # 5) 查询数据
  115. sql_list = f"""
  116. SELECT id, date_string, task_status, start_timestamp, finish_timestamp, trace_id, data
  117. FROM long_articles_task_manager
  118. WHERE {where_clause}
  119. ORDER BY {sort_by} {sort_dir}
  120. LIMIT %s OFFSET %s
  121. """
  122. list_params = (*params, page_size, offset)
  123. rows = await self.pool.async_fetch(query=sql_list, params=list_params)
  124. items = [
  125. {
  126. **r,
  127. "status_text": self.STATUS_TEXT.get(
  128. r["task_status"], str(r["task_status"])
  129. ),
  130. "task_name": self.get_task_chinese_name(self._safe_json(r["data"])),
  131. }
  132. for r in rows
  133. ]
  134. return {"total": total, "page": page, "page_size": page_size, "items": items}
  135. async def get_task(self, task_id: int):
  136. pass
  137. async def retry_task(self, task_id: int):
  138. pass
  139. async def cancel_task(self, task_id: int):
  140. pass