from tqdm.asyncio import tqdm from applications.utils.http import AsyncHttpClient class AutoRechunkTask: def __init__(self, mysql_client): self.mysql_client = mysql_client async def get_tasks(self): query = """ SELECT doc_id, title, text, text_type, dataset_id FROM contents WHERE status = %s; """ tasks = await self.mysql_client.async_fetch(query, params=(0,)) return tasks @staticmethod async def rechunk(task): url = "http://192.168.100.31:8001/api/chunk" async with AsyncHttpClient() as http_client: response = await http_client.post(url, json=task, headers={"Content-Type": "application/json"}) return response async def deal(self): tasks = await self.get_tasks() for task in tqdm(tasks, desc="rechunk each doc"): await self.rechunk(task) return len(tasks)