|
@@ -0,0 +1,31 @@
|
|
|
+from tqdm.asyncio import tqdm
|
|
|
+
|
|
|
+from applications.utils.http import AsyncHttpClient
|
|
|
+
|
|
|
+
|
|
|
+class AutoRechunkTask:
|
|
|
+ def __init__(self, mysql_client):
|
|
|
+ self.mysql_client = mysql_client
|
|
|
+
|
|
|
+ async def get_tasks(self):
|
|
|
+ query = """
|
|
|
+ SELECT doc_id, title, text, text_type, dataset_id
|
|
|
+ FROM contents
|
|
|
+ WHERE status = %s;
|
|
|
+ """
|
|
|
+ tasks = await self.mysql_client.async_fetch(query, params=(0,))
|
|
|
+ return tasks
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ async def rechunk(task):
|
|
|
+ url = "http://192.168.100.31:8001/api/chunk"
|
|
|
+ async with AsyncHttpClient() as http_client:
|
|
|
+ response = await http_client.post(url, json=task, headers={"Content-Type": "application/json"})
|
|
|
+ return response
|
|
|
+
|
|
|
+ async def deal(self):
|
|
|
+ tasks = await self.get_tasks()
|
|
|
+ for task in tqdm(tasks, desc="rechunk each doc"):
|
|
|
+ await self.rechunk(task)
|
|
|
+
|
|
|
+ return len(tasks)
|