luojunhui 3 veckor sedan
förälder
incheckning
d1b0fdd0b7

+ 3 - 1
applications/async_task/chunk_task.py

@@ -27,7 +27,9 @@ class ChunkEmbeddingTask(TopicAwareChunker):
         self.contents_processor = Contents(self.mysql_pool)
         self.content_chunk_processor = ContentChunks(self.mysql_pool)
 
-    async def process_content(self, doc_id: str, text: str, text_type: int) -> List[Chunk]:
+    async def process_content(
+        self, doc_id: str, text: str, text_type: int
+    ) -> List[Chunk]:
         flag = await self.contents_processor.insert_content(doc_id, text, text_type)
         if not flag:
             return []

+ 1 - 1
applications/config/__init__.py

@@ -21,5 +21,5 @@ __all__ = [
     "ChunkerConfig",
     "MILVUS_CONFIG",
     "RAG_MYSQL_CONFIG",
-    "WEIGHT_MAP"
+    "WEIGHT_MAP",
 ]

+ 2 - 1
applications/config/base_chunk.py

@@ -1,6 +1,7 @@
 from typing import List, Dict, Any
 from dataclasses import dataclass, field, asdict
 
+
 @dataclass
 class Chunk:
     chunk_id: int
@@ -28,4 +29,4 @@ class ChunkerConfig:
     enable_adaptive_boundary: bool = True
     enable_kg: bool = True
     topic_purity_floor: float = 0.8
-    kg_topk: int = 3
+    kg_topk: int = 3

+ 3 - 4
applications/config/milvus_config.py

@@ -1,8 +1,7 @@
-
 MILVUS_CONFIG = {
     # "host": "c-981be0ee7225467b-internal.milvus.aliyuncs.com", # 内网
-    "host": "c-981be0ee7225467b.milvus.aliyuncs.com", # 公网
+    "host": "c-981be0ee7225467b.milvus.aliyuncs.com",  # 公网
     "user": "root",
     "password": "Piaoquan@2025",
-    "port": "19530"
-}
+    "port": "19530",
+}

+ 1 - 1
applications/config/mysql_config.py

@@ -7,4 +7,4 @@ RAG_MYSQL_CONFIG = {
     "charset": "utf8mb4",
     "minsize": 5,
     "maxsize": 20,
-}
+}

+ 1 - 1
applications/config/weight_config.py

@@ -4,4 +4,4 @@ WEIGHT_MAP = {
     "vector_text": 0.6,
     "question_text": 0.3,
     "summary_text": 0.6,
-}
+}

+ 1 - 1
applications/utils/chunks/llm_classifier.py

@@ -56,4 +56,4 @@ class LLMClassifier:
             keywords=response.get("keywords", []),
             questions=response.get("questions", []),
             entities=response.get("entities", []),
-        )
+        )

+ 5 - 1
applications/utils/chunks/topic_aware_chunking.py

@@ -136,7 +136,11 @@ class TopicAwareChunker(BoundaryDetector, SplitTextIntoSentences):
             tokens = num_tokens(text)
             chunk_id += 1
             chunk = Chunk(
-                doc_id=self.doc_id, chunk_id=chunk_id, text=text, tokens=tokens, text_type=text_type
+                doc_id=self.doc_id,
+                chunk_id=chunk_id,
+                text=text,
+                tokens=tokens,
+                text_type=text_type,
             )
             chunks.append(chunk)
             start = end + 1

+ 6 - 1
applications/utils/milvus/__init__.py

@@ -3,4 +3,9 @@ from .functions import async_insert_chunk, async_search_chunk
 from .search import MilvusSearch
 
 
-__all__ = ["milvus_collection", "async_insert_chunk", "async_search_chunk", "MilvusSearch"]
+__all__ = [
+    "milvus_collection",
+    "async_insert_chunk",
+    "async_search_chunk",
+    "MilvusSearch",
+]

+ 2 - 2
applications/utils/milvus/search.py

@@ -1,6 +1,7 @@
 import asyncio
 from typing import List, Optional, Dict, Any, Union
 
+
 class MilvusBase:
 
     output_fields = [
@@ -34,7 +35,7 @@ class MilvusBase:
                 **{
                     key: list(value) if key in special_keys else value
                     for key, value in (hit.get("entity", {}) or {}).items()
-                }
+                },
             }
             for hit in hits[0]
         ]
@@ -66,7 +67,6 @@ class MilvusSearch(MilvusBase):
         )
         return {"results": self.hits_to_json(response)}
 
-
     # 混合搜索(向量 + metadata)
     async def hybrid_search(
         self,

+ 2 - 2
applications/utils/mysql/mapper.py

@@ -52,7 +52,7 @@ class ContentChunks(BaseMySQLClient):
                 chunk.text,
                 chunk.tokens,
                 chunk.topic_purity,
-                chunk.text_type
+                chunk.text_type,
             ),
         )
 
@@ -97,6 +97,6 @@ class ContentChunks(BaseMySQLClient):
                 json.dumps(chunk.entities),
                 chunk.doc_id,
                 chunk.chunk_id,
-                ori_status
+                ori_status,
             ),
         )

+ 13 - 3
routes/buleprint.py

@@ -3,7 +3,12 @@ import uuid
 
 from quart import Blueprint, jsonify, request
 
-from applications.config import DEFAULT_MODEL, LOCAL_MODEL_CONFIG, ChunkerConfig, WEIGHT_MAP
+from applications.config import (
+    DEFAULT_MODEL,
+    LOCAL_MODEL_CONFIG,
+    ChunkerConfig,
+    WEIGHT_MAP,
+)
 from applications.api import get_basic_embedding
 from applications.async_task import ChunkEmbeddingTask
 from applications.utils.milvus import MilvusSearch
@@ -32,7 +37,9 @@ def server_routes(mysql_db, vector_db):
         if not text:
             return jsonify({"error": "error  text"})
         doc_id = f"doc-{uuid.uuid4()}"
-        chunk_task = ChunkEmbeddingTask(mysql_db, vector_db, cfg=ChunkerConfig(), doc_id=doc_id)
+        chunk_task = ChunkEmbeddingTask(
+            mysql_db, vector_db, cfg=ChunkerConfig(), doc_id=doc_id
+        )
         doc_id = await chunk_task.deal(body)
         return jsonify({"doc_id": doc_id})
 
@@ -48,7 +55,10 @@ def server_routes(mysql_db, vector_db):
         try:
             # 统一参数
             expr = body.get("expr")
-            search_params = body.get("search_params") or {"metric_type": "COSINE", "params": {"ef": 64}}
+            search_params = body.get("search_params") or {
+                "metric_type": "COSINE",
+                "params": {"ef": 64},
+            }
             limit = body.get("limit", 50)
             query = body.get("query")
 

+ 3 - 1
vector_app.py

@@ -14,6 +14,7 @@ MODEL_PATH = LOCAL_MODEL_CONFIG[DEFAULT_MODEL]
 app_route = server_routes(mysql_manager, milvus_collection)
 app.register_blueprint(app_route)
 
+
 @app.before_serving
 async def startup():
     print("Starting application...")
@@ -24,7 +25,8 @@ async def startup():
     jieba.initialize()
     print("Jieba dictionary loaded successfully")
 
+
 @app.after_serving
 async def shutdown():
     print("Shutting down application...")
-    await mysql_manager.close_pools()
+    await mysql_manager.close_pools()