Sfoglia il codice sorgente

删除功能测试

luojunhui 2 settimane fa
parent
commit
071f3137c6

+ 3 - 9
applications/async_task/delete_task.py

@@ -35,18 +35,14 @@ class DeleteTask:
             handler=self.es_client.async_delete,
             description="delete IDs From ElasticSearch",
             unit="chunk",
-            max_concurrency=10
+            max_concurrency=10,
         )
 
     async def delete_by_query(self, filters: Dict):
         must_clauses = []
         for field, value in filters.items():
             must_clauses.append({"term": {field: value}})
-        query = {
-            "query": {
-                "bool": {"must": must_clauses}
-            }
-        }
+        query = {"query": {"bool": {"must": must_clauses}}}
         await self.es_client.async_delete_by_query(query=query)
 
     async def delete_ids_from_milvus(self, ids: List):
@@ -64,7 +60,6 @@ class DeleteTask:
         # step4, delete from es by query
         await self.delete_by_query(filters)
 
-
     async def delete_chunk(self, params):
         doc_id = params["doc_id"]
         chunk_id = params["chunk_id"]
@@ -107,7 +102,6 @@ class DeleteTask:
             print(f"delete dataset failed: {e}")
             return {"dataset_id": dataset_id, "status": "failed"}
 
-
     async def deal(self, level, params):
         """
         :param level: 删除级别
@@ -122,4 +116,4 @@ class DeleteTask:
             case "chunk":
                 return await self.delete_chunk(params)
             case _:
-                return {"error": "error level"}
+                return {"error": "error level"}

+ 0 - 1
applications/utils/chunks/kg_classifier.py

@@ -9,7 +9,6 @@ from applications.api import get_basic_embedding
 
 
 class KGClassifier:
-
     def __init__(self, kg_spec: Dict[str, Any]):
         self.root = kg_spec["root"]
         self._embed_cache: Dict[str, np.ndarray] = {}

+ 0 - 1
applications/utils/chunks/topic_aware_chunking.py

@@ -88,7 +88,6 @@ class BoundaryDetector:
 
 
 class TopicAwareChunker(BoundaryDetector, SplitTextIntoSentences):
-
     INIT_STATUS = 0
     PROCESSING_STATUS = 1
     FINISHED_STATUS = 2

+ 3 - 2
applications/utils/elastic_search/client.py

@@ -3,7 +3,6 @@ from elasticsearch.helpers import async_bulk
 
 
 class AsyncElasticSearchClient:
-
     def __init__(self, index_name, hosts, password):
         self.es = AsyncElasticsearch(hosts=hosts, basic_auth=("elastic", password))
         self.index_name = index_name
@@ -40,7 +39,9 @@ class AsyncElasticSearchClient:
         await self.es.delete(index=self.index_name, id=es_id)
 
     async def async_delete_by_query(self, query):
-        await self.es.delete_by_query(index=self.index_name, body=query, conflicts="proceed", refresh=True)
+        await self.es.delete_by_query(
+            index=self.index_name, body=query, conflicts="proceed", refresh=True
+        )
 
     async def bulk_insert(self, docs):
         success, errors = await async_bulk(self.es, docs, request_timeout=10)

+ 3 - 2
applications/utils/milvus/functions.py

@@ -14,7 +14,9 @@ async def async_insert_chunk(collection: pymilvus.Collection, data: Dict) -> Lis
     return result.primary_keys
 
 
-async def async_delete_chunk(collection: pymilvus.Collection, ids: List[int]) -> List[int]:
+async def async_delete_chunk(
+    collection: pymilvus.Collection, ids: List[int]
+) -> List[int]:
     """
     Delete entities by ids from a Milvus collection asynchronously.
 
@@ -32,4 +34,3 @@ async def async_delete_chunk(collection: pymilvus.Collection, ids: List[int]) ->
         return ids
     else:
         return ids[:success_count]
-

+ 0 - 2
applications/utils/milvus/search.py

@@ -3,7 +3,6 @@ from typing import List, Optional, Dict, Any, Union
 
 
 class MilvusBase:
-
     output_fields = [
         "id",
         "doc_id",
@@ -33,7 +32,6 @@ class MilvusBase:
 
 
 class MilvusSearch(MilvusBase):
-
     # 通过向量粗搜索
     async def base_vector_search(
         self,

+ 2 - 5
applications/utils/mysql/mapper.py

@@ -10,12 +10,11 @@ class TaskConst:
 
 
 class BaseMySQLClient:
-
     def __init__(self, pool):
         self.pool = pool
 
-class Dataset(BaseMySQLClient):
 
+class Dataset(BaseMySQLClient):
     async def update_dataset_status(self, dataset_id, ori_status, new_status):
         query = """
             UPDATE dataset set status = %s where id = %s and status = %s;
@@ -27,7 +26,6 @@ class Dataset(BaseMySQLClient):
 
 
 class Contents(BaseMySQLClient):
-
     async def insert_content(self, doc_id, text, text_type, title, dataset_id):
         query = """
             INSERT IGNORE INTO contents
@@ -76,7 +74,6 @@ class Contents(BaseMySQLClient):
 
 
 class ContentChunks(BaseMySQLClient):
-
     async def insert_chunk(self, chunk: Chunk) -> int:
         query = """
             INSERT IGNORE INTO content_chunks
@@ -175,4 +172,4 @@ class ContentChunks(BaseMySQLClient):
         """
         return await self.pool.async_save(
             query=query, params=(new_status, dataset_id, ori_status)
-        )
+        )

+ 0 - 1
applications/utils/nlp/split_text_into_sentences.py

@@ -6,7 +6,6 @@ from typing import List
 
 
 class SplitTextIntoSentences:
-
     @staticmethod
     def nltk_sent_tokenize(text: str) -> List[str]:
         """especially for English"""

+ 0 - 1
applications/utils/response/base_response.py

@@ -1,5 +1,4 @@
 class BaseResponse:
-
     @staticmethod
     def negative_response():
         pass

+ 2 - 0
routes/buleprint.py

@@ -42,6 +42,7 @@ async def img_embed():
     embedding = await get_img_embedding(url_list)
     return jsonify(embedding)
 
+
 @server_bp.route("/delete", methods=["POST"])
 async def delete():
     body = await request.get_json()
@@ -54,6 +55,7 @@ async def delete():
     response = await delete_task.deal(level, params)
     return jsonify(response)
 
+
 @server_bp.route("/chunk", methods=["POST"])
 async def chunk():
     body = await request.get_json()