|
@@ -8,8 +8,10 @@ class TaskConst:
|
|
FINISHED_STATUS = 2
|
|
FINISHED_STATUS = 2
|
|
FAILED_STATUS = 3
|
|
FAILED_STATUS = 3
|
|
|
|
|
|
|
|
+ CHUNK_USEFUL_STATUS = 1
|
|
|
|
|
|
-class BaseMySQLClient:
|
|
|
|
|
|
+
|
|
|
|
+class BaseMySQLClient(TaskConst):
|
|
def __init__(self, pool):
|
|
def __init__(self, pool):
|
|
self.pool = pool
|
|
self.pool = pool
|
|
|
|
|
|
@@ -77,8 +79,8 @@ class ContentChunks(BaseMySQLClient):
|
|
async def insert_chunk(self, chunk: Chunk) -> int:
|
|
async def insert_chunk(self, chunk: Chunk) -> int:
|
|
query = """
|
|
query = """
|
|
INSERT IGNORE INTO content_chunks
|
|
INSERT IGNORE INTO content_chunks
|
|
- (chunk_id, doc_id, text, tokens, topic_purity, text_type, dataset_id)
|
|
|
|
- VALUES (%s, %s, %s, %s, %s, %s, %s);
|
|
|
|
|
|
+ (chunk_id, doc_id, text, tokens, topic_purity, text_type, dataset_id, status)
|
|
|
|
+ VALUES (%s, %s, %s, %s, %s, %s, %s, %s);
|
|
"""
|
|
"""
|
|
return await self.pool.async_save(
|
|
return await self.pool.async_save(
|
|
query=query,
|
|
query=query,
|
|
@@ -90,6 +92,7 @@ class ContentChunks(BaseMySQLClient):
|
|
chunk.topic_purity,
|
|
chunk.topic_purity,
|
|
chunk.text_type,
|
|
chunk.text_type,
|
|
chunk.dataset_id,
|
|
chunk.dataset_id,
|
|
|
|
+ chunk.status,
|
|
),
|
|
),
|
|
)
|
|
)
|
|
|
|
|
|
@@ -97,10 +100,10 @@ class ContentChunks(BaseMySQLClient):
|
|
query = """
|
|
query = """
|
|
UPDATE content_chunks
|
|
UPDATE content_chunks
|
|
SET chunk_status = %s
|
|
SET chunk_status = %s
|
|
- WHERE doc_id = %s AND chunk_id = %s AND chunk_status = %s;
|
|
|
|
|
|
+ WHERE doc_id = %s AND chunk_id = %s AND chunk_status = %s and status = %s;
|
|
"""
|
|
"""
|
|
return await self.pool.async_save(
|
|
return await self.pool.async_save(
|
|
- query=query, params=(new_status, doc_id, chunk_id, ori_status)
|
|
|
|
|
|
+ query=query, params=(new_status, doc_id, chunk_id, ori_status, self.CHUNK_USEFUL_STATUS)
|
|
)
|
|
)
|
|
|
|
|
|
async def update_embedding_status(self, doc_id, chunk_id, ori_status, new_status):
|
|
async def update_embedding_status(self, doc_id, chunk_id, ori_status, new_status):
|