123456789101112131415161718192021222324252627282930313233 |
- from tqdm.asyncio import tqdm
- from applications.utils.http import AsyncHttpClient
- class AutoRechunkTask:
- def __init__(self, mysql_client):
- self.mysql_client = mysql_client
- async def get_tasks(self):
- query = """
- SELECT doc_id, title, text, text_type, dataset_id
- FROM contents
- WHERE status = %s;
- """
- tasks = await self.mysql_client.async_fetch(query, params=(0,))
- return tasks
- @staticmethod
- async def rechunk(task):
- url = "http://192.168.100.31:8001/api/chunk"
- async with AsyncHttpClient() as http_client:
- response = await http_client.post(
- url, json=task, headers={"Content-Type": "application/json"}
- )
- return response
- async def deal(self):
- tasks = await self.get_tasks()
- for task in tqdm(tasks, desc="rechunk each doc"):
- await self.rechunk(task)
- return len(tasks)
|