task_manager_service.py 5.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158
  1. import json
  2. from typing import Optional
  3. from app.core.config import GlobalConfigSettings
  4. def _build_where(id_eq=None, date_string=None, trace_id=None, task_status=None):
  5. conds, params = [], []
  6. if id_eq is not None:
  7. conds.append("id = %s")
  8. params.append(id_eq)
  9. if date_string: # 字符串非空
  10. conds.append("date_string = %s")
  11. params.append(date_string)
  12. if trace_id:
  13. conds.append("trace_id LIKE %s")
  14. # 如果调用方已经传了 %,就原样用;否则自动做包含匹配
  15. params.append(trace_id if "%" in trace_id else f"%{trace_id}%")
  16. if task_status is not None:
  17. conds.append("task_status = %s")
  18. params.append(task_status)
  19. where_clause = " AND ".join(conds) if conds else "1=1"
  20. return where_clause, params
  21. def _safe_json(v):
  22. try:
  23. if isinstance(v, (str, bytes, bytearray)):
  24. return json.loads(v)
  25. return v or {}
  26. except Exception:
  27. return {}
  28. class TaskConst:
  29. INIT_STATUS = 0
  30. PROCESSING_STATUS = 1
  31. FINISHED_STATUS = 2
  32. FAILED_STATUS = 99
  33. STATUS_TEXT = {0: "初始化", 1: "处理中", 2: "完成", 99: "失败"}
  34. DEFAULT_PAGE = 1
  35. DEFAULT_SIZE = 50
  36. class TaskManagerUtils(TaskConst):
  37. def __init__(self, config: GlobalConfigSettings):
  38. self.config = config
  39. def get_task_chinese_name(self, data):
  40. """
  41. 通过输入任务详情信息获取任务名称
  42. """
  43. task_name = data["task_name"]
  44. task_name_chinese = self.config.task_chinese_name.get(task_name, task_name)
  45. # account_method
  46. if task_name == "crawler_gzh_articles":
  47. account_method = data.get("account_method", "")
  48. account_method = account_method.replace(
  49. "account_association", "账号联想"
  50. ).replace("search", "")
  51. crawl_mode = data.get("crawl_mode", "")
  52. crawl_mode = crawl_mode.replace("search", "搜索").replace("account", "抓账号")
  53. strategy = data.get("strategy", "")
  54. return f"{task_name_chinese}\t{crawl_mode}\t{account_method}\t{strategy}"
  55. elif task_name == "article_pool_cold_start":
  56. platform = data.get("platform", "")
  57. platform = platform.replace("toutiao", "今日头条").replace("weixin", "微信")
  58. strategy = data.get("strategy", "")
  59. strategy = strategy.replace("strategy", "策略")
  60. category_list = data.get("category_list", [])
  61. category_list = "、".join(category_list)
  62. crawler_methods = data.get("crawler_methods", [])
  63. crawler_methods = "、".join(crawler_methods)
  64. return f"{task_name_chinese}\t{platform}\t{crawler_methods}\t{category_list}\t{strategy}"
  65. else:
  66. return task_name_chinese
  67. class TaskManagerService(TaskConst):
  68. def __init__(self, pool, data):
  69. self.pool = pool
  70. self.data = data
  71. async def list_tasks(self):
  72. page = self.data.get("page", self.DEFAULT_PAGE)
  73. page_size = self.data.get("size", self.DEFAULT_SIZE)
  74. sort_by = self.data.get("sort_by", "id")
  75. sort_dir = self.data.get("sort_dir", "desc").lower()
  76. # 过滤条件
  77. id_eq: Optional[int] = self.data.get("id") and int(self.data.get("id"))
  78. date_string: Optional[str] = self.data.get("date_string")
  79. trace_id: Optional[str] = self.data.get("trace_id")
  80. task_status: Optional[int] = self.data.get("task_status") and int(
  81. self.data.get("task_status")
  82. )
  83. # 1) WHERE 子句
  84. where_clause, params = _build_where(id_eq, date_string, trace_id, task_status)
  85. sort_whitelist = {
  86. "id",
  87. "date_string",
  88. "task_status",
  89. "start_timestamp",
  90. "finish_timestamp",
  91. }
  92. sort_by = sort_by if sort_by in sort_whitelist else "id"
  93. sort_dir = "ASC" if str(sort_dir).lower() == "asc" else "DESC"
  94. # 3) 分页(边界保护)
  95. page = max(1, int(page))
  96. page_size = max(1, min(int(page_size), 200)) # 适当限流
  97. offset = (page - 1) * page_size
  98. # 4) 统计总数(注意:WHERE 片段直接插入,值用参数化)
  99. sql_count = f"""
  100. SELECT COUNT(1) AS cnt
  101. FROM long_articles_task_manager
  102. WHERE {where_clause}
  103. """
  104. count_rows = await self.pool.async_fetch(query=sql_count, params=tuple(params))
  105. total = count_rows[0]["cnt"] if count_rows else 0
  106. # 5) 查询数据
  107. sql_list = f"""
  108. SELECT id, date_string, task_status, start_timestamp, finish_timestamp, trace_id, data
  109. FROM long_articles_task_manager
  110. WHERE {where_clause}
  111. ORDER BY {sort_by} {sort_dir}
  112. LIMIT %s OFFSET %s
  113. """
  114. list_params = (*params, page_size, offset)
  115. rows = await self.pool.async_fetch(query=sql_list, params=list_params)
  116. items = [
  117. {
  118. **r,
  119. "status_text": self.STATUS_TEXT.get(
  120. r["task_status"], str(r["task_status"])
  121. ),
  122. "task_name": get_task_chinese_name(_safe_json(r["data"])),
  123. }
  124. for r in rows
  125. ]
  126. return {"total": total, "page": page, "page_size": page_size, "items": items}
  127. async def get_task(self, task_id: int):
  128. pass
  129. async def retry_task(self, task_id: int):
  130. pass
  131. async def cancel_task(self, task_id: int):
  132. pass