Просмотр исходного кода

refactor(knowhub): rename serialize_milvus_result to to_serializable

The function has nothing to do with Milvus — it is a generic Python
object -> JSON-safe dict serializer that handles dicts, lists, iterables,
objects with to_dict(), and fallback __dict__ walking. The name was a
historical artifact from when the knowledge store actually used Milvus
(now removed from the dependency set entirely).

Rename to to_serializable across all 12 call sites in knowhub/server.py
plus the definition. Also update the docstring to reflect the real
purpose ("通用序列化工具:把任意 Python 对象转换为 JSON 可序列化的原生类型").
Talegorithm 20 часов назад
Родитель
Сommit
e940602280
1 измененных файлов с 14 добавлено и 14 удалено
  1. 14 14
      knowhub/server.py

+ 14 - 14
knowhub/server.py

@@ -145,24 +145,24 @@ def decrypt_content(resource_id: str, encrypted_text: str, provided_key: Optiona
         return "[ENCRYPTED]"
 
 
-def serialize_milvus_result(data):
-    """将 Milvus 返回的数据转换为可序列化的字典"""
+def to_serializable(data):
+    """通用序列化工具:把任意 Python 对象转换为 JSON 可序列化的原生类型"""
     # 基本类型直接返回
     if data is None or isinstance(data, (str, int, float, bool)):
         return data
 
     # 字典类型递归处理
     if isinstance(data, dict):
-        return {k: serialize_milvus_result(v) for k, v in data.items()}
+        return {k: to_serializable(v) for k, v in data.items()}
 
     # 列表/元组类型递归处理
     if isinstance(data, (list, tuple)):
-        return [serialize_milvus_result(item) for item in data]
+        return [to_serializable(item) for item in data]
 
     # 尝试转换为字典(对于有 to_dict 方法的对象)
     if hasattr(data, 'to_dict') and callable(getattr(data, 'to_dict')):
         try:
-            return serialize_milvus_result(data.to_dict())
+            return to_serializable(data.to_dict())
         except:
             pass
 
@@ -172,7 +172,7 @@ def serialize_milvus_result(data):
             # 强制转换为列表并递归处理
             result = []
             for item in data:
-                result.append(serialize_milvus_result(item))
+                result.append(to_serializable(item))
             return result
         except:
             pass
@@ -180,7 +180,7 @@ def serialize_milvus_result(data):
     # 尝试获取对象的属性字典
     if hasattr(data, '__dict__'):
         try:
-            return serialize_milvus_result(vars(data))
+            return to_serializable(vars(data))
         except:
             pass
 
@@ -1136,7 +1136,7 @@ async def search_knowledge_api(
             return {"results": [], "count": 0, "reranked": False}
 
         # 转换为可序列化的格式
-        serialized_candidates = [serialize_milvus_result(c) for c in candidates]
+        serialized_candidates = [to_serializable(c) for c in candidates]
 
         # 为了保证搜索的极致速度,直接返回向量召回的 top-k(跳过缓慢的 LLM 精排)
         return {"results": serialized_candidates[:top_k], "count": len(serialized_candidates[:top_k]), "reranked": False}
@@ -1279,7 +1279,7 @@ def list_knowledge(
         results = pg_store.query(filter_expr, limit=max_limit)
 
         # 转换为可序列化的格式
-        serialized_results = [serialize_milvus_result(r) for r in results]
+        serialized_results = [to_serializable(r) for r in results]
 
         # 按 created_at 降序排序(最新的在前)
         serialized_results.sort(key=lambda x: x.get('created_at', 0), reverse=True)
@@ -1316,7 +1316,7 @@ def get_all_tags():
         all_tags = set()
         for item in results:
             # 转换为标准字典
-            serialized_item = serialize_milvus_result(item)
+            serialized_item = to_serializable(item)
             tags_dict = serialized_item.get("tags", {})
             if isinstance(tags_dict, dict):
                 for key in tags_dict.keys():
@@ -1337,7 +1337,7 @@ def get_pending_knowledge(limit: int = Query(default=50, ge=1, le=200)):
             'status == "pending" or status == "processing" or status == "dedup_passed" or status == "analyzing"',
             limit=limit
         )
-        serialized = [serialize_milvus_result(r) for r in pending]
+        serialized = [to_serializable(r) for r in pending]
         return {"results": serialized, "count": len(serialized)}
     except Exception as e:
         print(f"[Pending] 错误: {e}")
@@ -1377,7 +1377,7 @@ def get_knowledge_status(knowledge_id: str):
         result = pg_store.get_by_id(knowledge_id)
         if not result:
             raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
-        serialized = serialize_milvus_result(result)
+        serialized = to_serializable(result)
         return {
             "id": knowledge_id,
             "status": serialized.get("status", "approved"),
@@ -1400,7 +1400,7 @@ def get_knowledge(knowledge_id: str):
         if not result:
             raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
 
-        return serialize_milvus_result(result)
+        return to_serializable(result)
 
     except HTTPException:
         raise
@@ -1743,7 +1743,7 @@ async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
         # 获取所有知识
         all_knowledge = pg_store.query('id != ""', limit=10000)
         # 转换为可序列化的格式
-        all_knowledge = [serialize_milvus_result(item) for item in all_knowledge]
+        all_knowledge = [to_serializable(item) for item in all_knowledge]
 
         if len(all_knowledge) < 2:
             return {"status": "ok", "message": f"知识库仅有 {len(all_knowledge)} 条,无需瘦身"}