task_manager_service.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162
  1. import json
  2. from typing import Optional
  3. from app.core.config import GlobalConfigSettings
  4. class TaskConst:
  5. INIT_STATUS = 0
  6. PROCESSING_STATUS = 1
  7. FINISHED_STATUS = 2
  8. FAILED_STATUS = 99
  9. STATUS_TEXT = {0: "初始化", 1: "处理中", 2: "完成", 99: "失败"}
  10. DEFAULT_PAGE = 1
  11. DEFAULT_SIZE = 50
  12. class TaskManagerUtils(TaskConst):
  13. def __init__(self, config: GlobalConfigSettings):
  14. self.config = config
  15. def get_task_chinese_name(self, data):
  16. """
  17. 通过输入任务详情信息获取任务名称
  18. """
  19. task_name = data["task_name"]
  20. task_name_chinese = self.config.task_chinese_name.get(task_name, task_name)
  21. # account_method
  22. if task_name == "crawler_gzh_articles":
  23. account_method = data.get("account_method", "")
  24. account_method = account_method.replace(
  25. "account_association", "账号联想"
  26. ).replace("search", "")
  27. crawl_mode = data.get("crawl_mode", "")
  28. crawl_mode = crawl_mode.replace("search", "搜索").replace(
  29. "account", "抓账号"
  30. )
  31. strategy = data.get("strategy", "")
  32. return f"{task_name_chinese}\t{crawl_mode}\t{account_method}\t{strategy}"
  33. elif task_name == "article_pool_cold_start":
  34. platform = data.get("platform", "")
  35. platform = platform.replace("toutiao", "今日头条").replace("weixin", "微信")
  36. strategy = data.get("strategy", "")
  37. strategy = strategy.replace("strategy", "策略")
  38. category_list = data.get("category_list", [])
  39. category_list = "、".join(category_list)
  40. crawler_methods = data.get("crawler_methods", [])
  41. crawler_methods = "、".join(crawler_methods)
  42. return f"{task_name_chinese}\t{platform}\t{crawler_methods}\t{category_list}\t{strategy}"
  43. else:
  44. return task_name_chinese
  45. @staticmethod
  46. def _build_where(id_eq=None, date_string=None, trace_id=None, task_status=None):
  47. conds, params = [], []
  48. if id_eq is not None:
  49. conds.append("id = %s")
  50. params.append(id_eq)
  51. if date_string: # 字符串非空
  52. conds.append("date_string = %s")
  53. params.append(date_string)
  54. if trace_id:
  55. conds.append("trace_id LIKE %s")
  56. # 如果调用方已经传了 %,就原样用;否则自动做包含匹配
  57. params.append(trace_id if "%" in trace_id else f"%{trace_id}%")
  58. if task_status is not None:
  59. conds.append("task_status = %s")
  60. params.append(task_status)
  61. where_clause = " AND ".join(conds) if conds else "1=1"
  62. return where_clause, params
  63. @staticmethod
  64. def _safe_json(v):
  65. try:
  66. if isinstance(v, (str, bytes, bytearray)):
  67. return json.loads(v)
  68. return v or {}
  69. except Exception:
  70. return {}
  71. class TaskManager(TaskManagerUtils):
  72. def __init__(self, pool, data, config: GlobalConfigSettings):
  73. super().__init__(config)
  74. self.pool = pool
  75. self.data = data
  76. async def list_tasks(self):
  77. page = self.data.get("page", self.DEFAULT_PAGE)
  78. page_size = self.data.get("size", self.DEFAULT_SIZE)
  79. sort_by = self.data.get("sort_by", "id")
  80. sort_dir = self.data.get("sort_dir", "desc").lower()
  81. # 过滤条件
  82. id_eq: Optional[int] = self.data.get("id") and int(self.data.get("id"))
  83. date_string: Optional[str] = self.data.get("date_string")
  84. trace_id: Optional[str] = self.data.get("trace_id")
  85. task_status: Optional[int] = self.data.get("task_status") and int(
  86. self.data.get("task_status")
  87. )
  88. # 1) WHERE 子句
  89. where_clause, params = self._build_where(id_eq, date_string, trace_id, task_status)
  90. sort_whitelist = {
  91. "id",
  92. "date_string",
  93. "task_status",
  94. "start_timestamp",
  95. "finish_timestamp",
  96. }
  97. sort_by = sort_by if sort_by in sort_whitelist else "id"
  98. sort_dir = "ASC" if str(sort_dir).lower() == "asc" else "DESC"
  99. # 3) 分页(边界保护)
  100. page = max(1, int(page))
  101. page_size = max(1, min(int(page_size), 200)) # 适当限流
  102. offset = (page - 1) * page_size
  103. # 4) 统计总数(注意:WHERE 片段直接插入,值用参数化)
  104. sql_count = f"""
  105. SELECT COUNT(1) AS cnt
  106. FROM long_articles_task_manager
  107. WHERE {where_clause}
  108. """
  109. count_rows = await self.pool.async_fetch(query=sql_count, params=tuple(params))
  110. total = count_rows[0]["cnt"] if count_rows else 0
  111. # 5) 查询数据
  112. sql_list = f"""
  113. SELECT id, date_string, task_status, start_timestamp, finish_timestamp, trace_id, data
  114. FROM long_articles_task_manager
  115. WHERE {where_clause}
  116. ORDER BY {sort_by} {sort_dir}
  117. LIMIT %s OFFSET %s
  118. """
  119. list_params = (*params, page_size, offset)
  120. rows = await self.pool.async_fetch(query=sql_list, params=list_params)
  121. items = [
  122. {
  123. **r,
  124. "status_text": self.STATUS_TEXT.get(
  125. r["task_status"], str(r["task_status"])
  126. ),
  127. "task_name": self.get_task_chinese_name(self._safe_json(r["data"])),
  128. }
  129. for r in rows
  130. ]
  131. return {"total": total, "page": page, "page_size": page_size, "items": items}
  132. async def get_task(self, task_id: int):
  133. pass
  134. async def retry_task(self, task_id: int):
  135. pass
  136. async def cancel_task(self, task_id: int):
  137. pass