auto_rechunk_task.py 937 B

12345678910111213141516171819202122232425262728293031
  1. from tqdm.asyncio import tqdm
  2. from applications.utils.http import AsyncHttpClient
  3. class AutoRechunkTask:
  4. def __init__(self, mysql_client):
  5. self.mysql_client = mysql_client
  6. async def get_tasks(self):
  7. query = """
  8. SELECT doc_id, title, text, text_type, dataset_id
  9. FROM contents
  10. WHERE status = %s;
  11. """
  12. tasks = await self.mysql_client.async_fetch(query, params=(0,))
  13. return tasks
  14. @staticmethod
  15. async def rechunk(task):
  16. url = "http://192.168.100.31:8001/api/chunk"
  17. async with AsyncHttpClient() as http_client:
  18. response = await http_client.post(url, json=task, headers={"Content-Type": "application/json"})
  19. return response
  20. async def deal(self):
  21. tasks = await self.get_tasks()
  22. for task in tqdm(tasks, desc="rechunk each doc"):
  23. await self.rechunk(task)
  24. return len(tasks)