luojunhui 3 days ago
parent
commit
f589a80c2f

+ 0 - 0
applications/__init__.py


+ 6 - 0
applications/config/__init__.py

@@ -0,0 +1,6 @@
+from .model_config import MODEL_CONFIG, DEFAULT_MODEL
+
+__all__ = [
+    "MODEL_CONFIG",
+    "DEFAULT_MODEL"
+]

+ 10 - 0
applications/config/model_config.py

@@ -0,0 +1,10 @@
+MODEL_CONFIG = {
+    "Qwen/Qwen3-Embedding-0.6B": {"url": "http://vllm-0.6b:8000/v1/embeddings", "dim": 1536},
+    "Qwen/Qwen3-Embedding-4B": {"url": "http://vllm-4b:8000/v1/embeddings", "dim": 1536},
+    "Qwen/Qwen3-Embedding-8B": {"url": "http://vllm-8b:8000/v1/embeddings", "dim": 1536},
+}
+
+DEFAULT_MODEL = "Qwen/Qwen3-Embedding-0.6B"
+
+
+__all__ = ["MODEL_CONFIG", "DEFAULT_MODEL"]

+ 5 - 0
applications/embedding/__init__.py

@@ -0,0 +1,5 @@
+from .basic import get_basic_embedding
+
+__all__ = [
+    "get_basic_embedding"
+]

+ 23 - 0
applications/embedding/basic.py

@@ -0,0 +1,23 @@
+from applications.config import MODEL_CONFIG
+from applications.utils import AsyncHttpClient
+
+
+async def get_basic_embedding(text: str, model: str):
+    """
+    embedding text into vectors
+    :param text:
+    :param model:
+    :return:
+    """
+    cfg = MODEL_CONFIG[model]
+    async with AsyncHttpClient(timeout=20) as client:
+        response = await client.post(
+            url=cfg["url"],
+            json={"input": text, "model": model},
+            headers={"Content-Type": "application/json"},
+        )
+        return response['data'][0]["embedding"]
+
+__all__ = [
+    "get_basic_embedding"
+]

+ 1 - 0
applications/utils/__init__.py

@@ -0,0 +1 @@
+from .async_http_client import AsyncHttpClient

+ 99 - 0
applications/utils/async_http_client.py

@@ -0,0 +1,99 @@
+import aiohttp
+from typing import Optional, Union, Dict, Any
+
+
+class AsyncHttpClient:
+    def __init__(
+        self,
+        timeout: int = 10,
+        max_connections: int = 100,
+        default_headers: Optional[Dict[str, str]] = None,
+    ):
+        """
+        简化版异步 HTTP 客户端
+
+        :param timeout: 请求超时时间(秒)
+        :param max_connections: 连接池最大连接数
+        :param default_headers: 默认请求头
+        """
+        self.timeout = aiohttp.ClientTimeout(total=timeout)
+        self.connector = aiohttp.TCPConnector(limit=max_connections)
+        self.default_headers = default_headers or {}
+        self.session = None
+
+    async def __aenter__(self):
+        self.session = aiohttp.ClientSession(
+            connector=self.connector, timeout=self.timeout, headers=self.default_headers
+        )
+        return self
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        await self.session.close()
+
+    async def request(
+        self,
+        method: str,
+        url: str,
+        params: Optional[Dict[str, Any]] = None,
+        data: Optional[Union[Dict[str, Any], str, bytes]] = None,
+        json: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Union[Dict[str, Any], str]:
+        """核心请求方法"""
+        request_headers = {**self.default_headers, **(headers or {})}
+
+        try:
+            async with self.session.request(
+                method,
+                url,
+                params=params,
+                data=data,
+                json=json,
+                headers=request_headers,
+            ) as response:
+                response.raise_for_status()
+                content_type = response.headers.get("Content-Type", "")
+
+                if "application/json" in content_type:
+                    return await response.json()
+                return await response.text()
+
+        except aiohttp.ClientResponseError as e:
+            print(f"HTTP error: {e.status} {e.message}")
+            raise
+        except aiohttp.ClientError as e:
+            print(f"Network error: {str(e)}")
+            raise
+
+    async def get(
+        self,
+        url: str,
+        params: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Union[Dict[str, Any], str]:
+        """GET 请求"""
+        return await self.request("GET", url, params=params, headers=headers)
+
+    async def post(
+        self,
+        url: str,
+        data: Optional[Union[Dict[str, Any], str, bytes]] = None,
+        json: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Union[Dict[str, Any], str]:
+        """POST 请求"""
+        return await self.request("POST", url, data=data, json=json, headers=headers)
+
+    async def put(
+        self,
+        url: str,
+        data: Optional[Union[Dict[str, Any], str, bytes]] = None,
+        json: Optional[Dict[str, Any]] = None,
+        headers: Optional[Dict[str, str]] = None,
+    ) -> Union[Dict[str, Any], str]:
+        """
+        PUT 请求
+
+        通常用于更新资源,可以发送表单数据或 JSON 数据
+        """
+        return await self.request("PUT", url, data=data, json=json, headers=headers)

+ 5 - 0
applications/utils/response.py

@@ -0,0 +1,5 @@
+class BaseResponse:
+
+    @staticmethod
+    def negative_response():
+        pass

+ 54 - 0
docker-compose.yml

@@ -0,0 +1,54 @@
+version: "3.8"
+services:
+  # vLLM - Qwen3 0.6B
+  vllm-0.6b:
+    image: vllm/vllm-openai:latest
+    container_name: vllm-qwen3-0.6b
+    ports:
+      - "8100:8000"
+    command: >
+      --model Qwen/Qwen3-Embedding-0.6B
+      --dtype float16
+      --api-port 8000
+    volumes:
+      - ./models:/root/.cache/huggingface
+
+#  # vLLM - Qwen3 4B
+#  vllm-4b:
+#    image: vllm/vllm-openai:latest
+#    container_name: vllm-qwen3-4b
+#    ports:
+#      - "8200:8000"
+#    command: >
+#      --model Qwen/Qwen3-Embedding-4B
+#      --dtype float16
+#      --api-port 8000
+#    volumes:
+#      - ./models:/root/.cache/huggingface
+#
+#  # vLLM - Qwen3 8B
+#  vllm-8b:
+#    image: vllm/vllm-openai:latest
+#    container_name: vllm-qwen3-8b
+#    ports:
+#      - "8300:8000"
+#    command: >
+#      --model Qwen/Qwen3-Embedding-8B
+#      --dtype float16
+#      --api-port 8000
+#    volumes:
+#      - ./models:/root/.cache/huggingface
+
+  # Milvus 向量数据库
+  milvus:
+    image: milvusdb/milvus:2.4.0
+    container_name: milvus
+    ports:
+      - "19530:19530"
+      - "9091:9091"
+    environment:
+      - ETCD_USE_EMBED=true
+      - MINIO_USE_EMBED=true
+      - PULSAR_USE_EMBED=true
+    volumes:
+      - ./milvus_data:/var/lib/milvus

+ 0 - 0
requirements.txt


+ 5 - 0
routes/__init__.py

@@ -0,0 +1,5 @@
+from .buleprint import server_routes
+
+__all__ = [
+    "server_routes"
+]

+ 29 - 0
routes/buleprint.py

@@ -0,0 +1,29 @@
+from quart import Blueprint, jsonify, request
+
+from applications.config import DEFAULT_MODEL, MODEL_CONFIG
+from applications.embedding import get_basic_embedding
+
+
+server_bp = Blueprint('api', __name__, url_prefix='/api')
+
+def server_routes(vector_db):
+
+    @server_bp.route('/embed', methods=['POST'])
+    async def embed():
+        body = await request.get_json()
+        text = body.get('text')
+        model_name = body.get('model', DEFAULT_MODEL)
+        if not MODEL_CONFIG.get(model_name):
+            return jsonify(
+                {"error": "error  model"}
+            )
+
+        embedding = await get_basic_embedding(text, model_name)
+        return jsonify({
+            "embedding": embedding
+        })
+
+
+    @server_bp.route('/search', methods=['POST'])
+    async def search():
+        pass

+ 14 - 0
vector_app.py

@@ -0,0 +1,14 @@
+from quart import Quart
+from quart_cors import cors
+from pymilvus import connections
+
+from routes import server_routes
+
+app = Quart(__name__)
+
+# 连接图数据库
+connections.connect("default", host="milvus", port="19530")
+
+# 注册路由
+app_route = server_routes(connections)
+app.register_blueprint(app_route)