|
|
@@ -3,8 +3,10 @@
|
|
|
|
|
|
提供:
|
|
|
1. API 接口:POST /api/tasks - 触发内容寻找任务
|
|
|
-2. 定时调度:每 10 分钟从数据库联表查询未处理需求并执行任务
|
|
|
-3. 并发控制:限制最大并发任务数
|
|
|
+2. 定时调度:启动后先恢复 demand_find_task 中 status=执行中 的任务;之后每 10 分钟从
|
|
|
+ demand_content 取当天(dt=YYYYMMDD)且未建任务记录的 1 条需求执行(不区分品类)
|
|
|
+3. 并发控制:限制最大并发任务数;定时侧若已有任务在执行则跳过本次轮询
|
|
|
+4. 单次寻找任务最长执行 15 分钟,超时记为失败并回写 demand_find_task
|
|
|
"""
|
|
|
|
|
|
import asyncio
|
|
|
@@ -27,7 +29,13 @@ from dotenv import load_dotenv
|
|
|
load_dotenv()
|
|
|
|
|
|
import core
|
|
|
-from db import get_daily_unprocessed_pool, create_task_record, update_task_status, update_task_on_complete
|
|
|
+from db import (
|
|
|
+ create_task_record,
|
|
|
+ get_first_running_task,
|
|
|
+ get_one_today_unprocessed_demand,
|
|
|
+ update_task_status,
|
|
|
+ update_task_on_complete,
|
|
|
+)
|
|
|
from db.schedule import STATUS_RUNNING, STATUS_SUCCESS, STATUS_FAILED
|
|
|
|
|
|
# 配置日志
|
|
|
@@ -59,6 +67,10 @@ scheduler = AsyncIOScheduler(timezone=ZoneInfo(SCHEDULER_TIMEZONE))
|
|
|
MAX_CONCURRENT_TASKS = int(os.getenv("MAX_CONCURRENT_TASKS", "1"))
|
|
|
task_semaphore = asyncio.Semaphore(MAX_CONCURRENT_TASKS)
|
|
|
|
|
|
+# 定时:轮询间隔(分钟)、单次任务超时(秒,默认 15 分钟)
|
|
|
+SCHEDULE_INTERVAL_MINUTES = int(os.getenv("SCHEDULE_INTERVAL_MINUTES", "10"))
|
|
|
+TASK_TIMEOUT_SECONDS = int(os.getenv("SCHEDULE_TASK_TIMEOUT_SECONDS", "900"))
|
|
|
+
|
|
|
# 统计信息
|
|
|
stats = {
|
|
|
"total_tasks": 0,
|
|
|
@@ -121,8 +133,11 @@ async def execute_task(
|
|
|
logger.warning(f"更新任务状态为执行中失败: {e}")
|
|
|
|
|
|
try:
|
|
|
- result = await core.run_agent(
|
|
|
- query, demand_id=demand_id, stream_output=False, log_assistant_text=True
|
|
|
+ result = await asyncio.wait_for(
|
|
|
+ core.run_agent(
|
|
|
+ query, demand_id=demand_id, stream_output=False, log_assistant_text=True
|
|
|
+ ),
|
|
|
+ timeout=float(TASK_TIMEOUT_SECONDS),
|
|
|
)
|
|
|
duration = (datetime.now() - start_time).total_seconds()
|
|
|
|
|
|
@@ -137,6 +152,15 @@ async def execute_task(
|
|
|
if task_type == "scheduled" and demand_id is not None:
|
|
|
_update_scheduled_task_complete(demand_id, result.get("trace_id") or "", STATUS_FAILED)
|
|
|
|
|
|
+ except asyncio.TimeoutError:
|
|
|
+ stats["failed_tasks"] += 1
|
|
|
+ duration = (datetime.now() - start_time).total_seconds()
|
|
|
+ logger.error(
|
|
|
+ f"任务超时 [{task_type}]: 超过 {TASK_TIMEOUT_SECONDS}s,记为失败, 耗时={duration:.1f}s"
|
|
|
+ )
|
|
|
+ if task_type == "scheduled" and demand_id is not None:
|
|
|
+ _update_scheduled_task_complete(demand_id, "", STATUS_FAILED)
|
|
|
+
|
|
|
except Exception as e:
|
|
|
stats["failed_tasks"] += 1
|
|
|
duration = (datetime.now() - start_time).total_seconds()
|
|
|
@@ -145,42 +169,59 @@ async def execute_task(
|
|
|
_update_scheduled_task_complete(demand_id, "", STATUS_FAILED)
|
|
|
|
|
|
|
|
|
-async def scheduled_task():
|
|
|
- """
|
|
|
- 定时任务:每天上午 6 点执行一次(生成当日池子并跑完)
|
|
|
+def _today_dt_int() -> int:
|
|
|
+ """当天 demand_content.dt 约定为 YYYYMMDD 整数(如 20260402),与定时器时区一致。"""
|
|
|
+ return int(datetime.now(ZoneInfo(SCHEDULER_TIMEZONE)).strftime("%Y%m%d"))
|
|
|
|
|
|
- 流程:
|
|
|
- 1. 从 demand_content 中按 merge_leve2 品类去重分组,每个品类取 score 最高且未处理过的 5 条
|
|
|
- 2. 全局最多取 20 条,作为当天“池子”
|
|
|
- 3. 为池子中每条 demand_content 创建 demand_find_task 记录,并执行任务(并发受 MAX_CONCURRENT_TASKS 限制)
|
|
|
+
|
|
|
+async def scheduled_tick():
|
|
|
+ """
|
|
|
+ 每 10 分钟执行一次:若当前无任务占用并发槽,则从 demand_content 取当天(dt=今日)
|
|
|
+ 且尚未出现在 demand_find_task 中的 1 条需求并执行。
|
|
|
"""
|
|
|
- logger.info("定时任务触发")
|
|
|
+ logger.info("定时任务触发(scheduled_tick)")
|
|
|
|
|
|
- pool = get_daily_unprocessed_pool(total_limit=20, per_category_limit=5)
|
|
|
- if not pool:
|
|
|
- logger.info("定时任务跳过:无待处理需求(当日池子为空)")
|
|
|
+ if task_semaphore._value != MAX_CONCURRENT_TASKS:
|
|
|
+ logger.info("定时任务跳过:仍有任务在执行(并发槽已满)")
|
|
|
return
|
|
|
|
|
|
- logger.info(f"当日任务池生成:count={len(pool)}(每日上限 20,每品类上限 5)")
|
|
|
-
|
|
|
- tasks: list[asyncio.Task] = []
|
|
|
- for item in pool:
|
|
|
- query = (item.get("query") or "").strip()
|
|
|
- demand_content_id = item.get("demand_content_id")
|
|
|
- if not query or demand_content_id is None:
|
|
|
- continue
|
|
|
- create_task_record(demand_content_id) # trace_id 初始为空,完成后更新
|
|
|
- tasks.append(
|
|
|
- asyncio.create_task(
|
|
|
- execute_task(query=query, demand_id=demand_content_id, task_type="scheduled")
|
|
|
- )
|
|
|
- )
|
|
|
+ dt = _today_dt_int()
|
|
|
+ item = get_one_today_unprocessed_demand(dt=dt)
|
|
|
+ if not item:
|
|
|
+ logger.info(f"定时任务跳过:无待处理需求(dt={dt} 或均已建任务)")
|
|
|
+ return
|
|
|
|
|
|
- if not tasks:
|
|
|
- logger.info("定时任务跳过:当日池子中无有效 query")
|
|
|
+ demand_content_id = item.get("demand_content_id")
|
|
|
+ query = (item.get("query") or "").strip()
|
|
|
+ if demand_content_id is None or not query:
|
|
|
+ logger.info("定时任务跳过:查询结果无效")
|
|
|
return
|
|
|
|
|
|
- await asyncio.gather(*tasks, return_exceptions=True)
|
|
|
+ logger.info(f"定时任务领取:demand_content_id={demand_content_id}, dt={dt}")
|
|
|
+ create_task_record(demand_content_id)
|
|
|
+ await execute_task(query=query, demand_id=demand_content_id, task_type="scheduled")
|
|
|
+
|
|
|
+
|
|
|
+async def run_startup_resume():
|
|
|
+ """
|
|
|
+ 启动后先执行 demand_find_task 中 status=执行中(1) 的任务(理论上仅一条)。
|
|
|
+ """
|
|
|
+ try:
|
|
|
+ row = get_first_running_task()
|
|
|
+ if not row:
|
|
|
+ logger.info("启动恢复:无执行中(status=1)的 demand_find_task")
|
|
|
+ return
|
|
|
+
|
|
|
+ demand_content_id = row.get("demand_content_id")
|
|
|
+ query = (row.get("query") or "").strip()
|
|
|
+ if demand_content_id is None or not query:
|
|
|
+ logger.warning("启动恢复:执行中任务数据不完整,跳过")
|
|
|
+ return
|
|
|
+
|
|
|
+ logger.info(f"启动恢复:执行 demand_find_task status=1, demand_content_id={demand_content_id}")
|
|
|
+ await execute_task(query=query, demand_id=int(demand_content_id), task_type="scheduled")
|
|
|
+ except Exception as e:
|
|
|
+ logger.error(f"启动恢复失败: {e}", exc_info=True)
|
|
|
|
|
|
|
|
|
# ============ API 接口 ============
|
|
|
@@ -352,21 +393,23 @@ async def startup():
|
|
|
logger.info("内容寻找服务启动中...")
|
|
|
logger.info(f"最大并发任务数: {MAX_CONCURRENT_TASKS}")
|
|
|
logger.info(f"定时器时区: {SCHEDULER_TIMEZONE}")
|
|
|
+ logger.info(
|
|
|
+ f"定时策略:每 {SCHEDULE_INTERVAL_MINUTES} 分钟轮询当天需求;"
|
|
|
+ f"单次任务超时 {TASK_TIMEOUT_SECONDS}s"
|
|
|
+ )
|
|
|
+
|
|
|
+ asyncio.create_task(run_startup_resume())
|
|
|
|
|
|
- # 配置定时任务:每天上午 6 点触发一次
|
|
|
job = scheduler.add_job(
|
|
|
- scheduled_task,
|
|
|
- "cron",
|
|
|
- hour="6",
|
|
|
- minute="0",
|
|
|
- second="0",
|
|
|
+ scheduled_tick,
|
|
|
+ "interval",
|
|
|
+ minutes=SCHEDULE_INTERVAL_MINUTES,
|
|
|
misfire_grace_time=300,
|
|
|
coalesce=True,
|
|
|
+ max_instances=1,
|
|
|
)
|
|
|
scheduler.start()
|
|
|
logger.info(f"定时任务已注册: id={job.id}, next_run_time={job.next_run_time}")
|
|
|
- # asyncio.create_task(scheduled_task())
|
|
|
- # logger.info("定时任务已启动:启动后立即执行一次,之后每 10 分钟执行(从数据库获取待处理需求)")
|
|
|
|
|
|
logger.info("服务启动完成")
|
|
|
logger.info("=" * 60)
|