|
@@ -2,7 +2,7 @@
|
|
|
KnowHub Server
|
|
KnowHub Server
|
|
|
|
|
|
|
|
Agent 工具使用经验的共享平台。
|
|
Agent 工具使用经验的共享平台。
|
|
|
-FastAPI + SQLite,单文件部署。
|
|
|
|
|
|
|
+FastAPI + Milvus Lite(知识)+ SQLite(资源),单文件部署。
|
|
|
"""
|
|
"""
|
|
|
|
|
|
|
|
import os
|
|
import os
|
|
@@ -11,6 +11,8 @@ import json
|
|
|
import sqlite3
|
|
import sqlite3
|
|
|
import asyncio
|
|
import asyncio
|
|
|
import base64
|
|
import base64
|
|
|
|
|
+import time
|
|
|
|
|
+import uuid
|
|
|
from contextlib import asynccontextmanager
|
|
from contextlib import asynccontextmanager
|
|
|
from datetime import datetime, timezone
|
|
from datetime import datetime, timezone
|
|
|
from typing import Optional
|
|
from typing import Optional
|
|
@@ -31,6 +33,10 @@ load_dotenv(Path(__file__).parent.parent / ".env")
|
|
|
|
|
|
|
|
from agent.llm.openrouter import openrouter_llm_call
|
|
from agent.llm.openrouter import openrouter_llm_call
|
|
|
|
|
|
|
|
|
|
+# 导入向量存储和 embedding
|
|
|
|
|
+from knowhub.vector_store import MilvusStore
|
|
|
|
|
+from knowhub.embeddings import get_embedding, get_embeddings_batch
|
|
|
|
|
+
|
|
|
BRAND_NAME = os.getenv("BRAND_NAME", "KnowHub")
|
|
BRAND_NAME = os.getenv("BRAND_NAME", "KnowHub")
|
|
|
BRAND_API_ENV = os.getenv("BRAND_API_ENV", "KNOWHUB_API")
|
|
BRAND_API_ENV = os.getenv("BRAND_API_ENV", "KNOWHUB_API")
|
|
|
BRAND_DB = os.getenv("BRAND_DB", "knowhub.db")
|
|
BRAND_DB = os.getenv("BRAND_DB", "knowhub.db")
|
|
@@ -45,6 +51,10 @@ if ORG_KEYS_RAW:
|
|
|
ORG_KEYS[org.strip()] = key_b64.strip()
|
|
ORG_KEYS[org.strip()] = key_b64.strip()
|
|
|
|
|
|
|
|
DB_PATH = Path(__file__).parent / BRAND_DB
|
|
DB_PATH = Path(__file__).parent / BRAND_DB
|
|
|
|
|
+MILVUS_DATA_DIR = Path(__file__).parent / "milvus_data"
|
|
|
|
|
+
|
|
|
|
|
+# 全局 Milvus 存储实例
|
|
|
|
|
+milvus_store: Optional[MilvusStore] = None
|
|
|
|
|
|
|
|
# --- 数据库 ---
|
|
# --- 数据库 ---
|
|
|
|
|
|
|
@@ -127,6 +137,7 @@ def decrypt_content(resource_id: str, encrypted_text: str, provided_key: Optiona
|
|
|
|
|
|
|
|
|
|
|
|
|
def init_db():
|
|
def init_db():
|
|
|
|
|
+ """初始化 SQLite(仅用于 resources)"""
|
|
|
conn = get_db()
|
|
conn = get_db()
|
|
|
conn.execute("""
|
|
conn.execute("""
|
|
|
CREATE TABLE IF NOT EXISTS experiences (
|
|
CREATE TABLE IF NOT EXISTS experiences (
|
|
@@ -160,28 +171,6 @@ def init_db():
|
|
|
)
|
|
)
|
|
|
""")
|
|
""")
|
|
|
|
|
|
|
|
- conn.execute("""
|
|
|
|
|
- CREATE TABLE IF NOT EXISTS knowledge (
|
|
|
|
|
- id TEXT PRIMARY KEY,
|
|
|
|
|
- message_id TEXT DEFAULT '',
|
|
|
|
|
- types TEXT NOT NULL, -- JSON array: ["strategy", "tool"]
|
|
|
|
|
- task TEXT NOT NULL,
|
|
|
|
|
- tags TEXT DEFAULT '{}', -- JSON object: {"category": "...", "domain": "..."}
|
|
|
|
|
- scopes TEXT DEFAULT '["org:cybertogether"]', -- JSON array
|
|
|
|
|
- owner TEXT DEFAULT '',
|
|
|
|
|
- content TEXT NOT NULL,
|
|
|
|
|
- resource_ids TEXT DEFAULT '[]', -- JSON array: ["code/selenium/login", "credentials/website"]
|
|
|
|
|
- source TEXT DEFAULT '{}', -- JSON object: {name, category, urls, agent_id, submitted_by, timestamp}
|
|
|
|
|
- eval TEXT DEFAULT '{}', -- JSON object: {score, helpful, harmful, confidence, histories}
|
|
|
|
|
- created_at TEXT NOT NULL,
|
|
|
|
|
- updated_at TEXT DEFAULT ''
|
|
|
|
|
- )
|
|
|
|
|
- """)
|
|
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_types ON knowledge(types)")
|
|
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_task ON knowledge(task)")
|
|
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_owner ON knowledge(owner)")
|
|
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_scopes ON knowledge(scopes)")
|
|
|
|
|
-
|
|
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
conn.close()
|
|
conn.close()
|
|
|
|
|
|
|
@@ -294,9 +283,18 @@ class ResourceOut(BaseModel):
|
|
|
|
|
|
|
|
@asynccontextmanager
|
|
@asynccontextmanager
|
|
|
async def lifespan(app: FastAPI):
|
|
async def lifespan(app: FastAPI):
|
|
|
|
|
+ global milvus_store
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化 SQLite(resources)
|
|
|
init_db()
|
|
init_db()
|
|
|
|
|
+
|
|
|
|
|
+ # 初始化 Milvus Lite(knowledge)
|
|
|
|
|
+ milvus_store = MilvusStore(data_dir=str(MILVUS_DATA_DIR))
|
|
|
|
|
+
|
|
|
yield
|
|
yield
|
|
|
|
|
|
|
|
|
|
+ # 清理(Milvus Lite 会自动处理)
|
|
|
|
|
+
|
|
|
|
|
|
|
|
app = FastAPI(title=BRAND_NAME, lifespan=lifespan)
|
|
app = FastAPI(title=BRAND_NAME, lifespan=lifespan)
|
|
|
|
|
|
|
@@ -498,181 +496,58 @@ def list_resources(
|
|
|
|
|
|
|
|
# ===== Knowledge API =====
|
|
# ===== Knowledge API =====
|
|
|
|
|
|
|
|
-# 两阶段检索逻辑
|
|
|
|
|
-async def _route_knowledge_by_llm(query_text: str, metadata_list: list[dict], k: int = 5) -> list[str]:
|
|
|
|
|
|
|
+async def _llm_rerank(query: str, candidates: list[dict], top_k: int) -> list[str]:
|
|
|
"""
|
|
"""
|
|
|
- 第一阶段:语义路由。
|
|
|
|
|
- 让 LLM 挑选出 2*k 个语义相关的 ID。
|
|
|
|
|
|
|
+ 使用 LLM 对候选知识进行精排
|
|
|
|
|
+
|
|
|
|
|
+ Args:
|
|
|
|
|
+ query: 查询文本
|
|
|
|
|
+ candidates: 候选知识列表
|
|
|
|
|
+ top_k: 返回数量
|
|
|
|
|
+
|
|
|
|
|
+ Returns:
|
|
|
|
|
+ 排序后的知识 ID 列表
|
|
|
"""
|
|
"""
|
|
|
- if not metadata_list:
|
|
|
|
|
|
|
+ if not candidates:
|
|
|
return []
|
|
return []
|
|
|
|
|
|
|
|
- routing_k = k * 2
|
|
|
|
|
|
|
+ # 构造 prompt
|
|
|
|
|
+ candidates_text = "\n".join([
|
|
|
|
|
+ f"[{i+1}] ID: {c['id']}\nTask: {c['task']}\nContent: {c['content'][:200]}..."
|
|
|
|
|
+ for i, c in enumerate(candidates)
|
|
|
|
|
+ ])
|
|
|
|
|
|
|
|
- routing_data = [
|
|
|
|
|
- {
|
|
|
|
|
- "id": m["id"],
|
|
|
|
|
- "types": m["types"],
|
|
|
|
|
- "task": m["task"][:100]
|
|
|
|
|
- } for m in metadata_list
|
|
|
|
|
- ]
|
|
|
|
|
|
|
+ prompt = f"""你是知识检索专家。根据用户查询,从候选知识中选出最相关的 {top_k} 条。
|
|
|
|
|
|
|
|
- prompt = f"""
|
|
|
|
|
-你是一个知识检索专家。根据用户的当前任务需求,从下列原子知识元数据中挑选出最相关的最多 {routing_k} 个知识 ID。
|
|
|
|
|
-任务需求:"{query_text}"
|
|
|
|
|
|
|
+用户查询:"{query}"
|
|
|
|
|
|
|
|
-可选知识列表:
|
|
|
|
|
-{json.dumps(routing_data, ensure_ascii=False, indent=1)}
|
|
|
|
|
|
|
+候选知识:
|
|
|
|
|
+{candidates_text}
|
|
|
|
|
|
|
|
-请直接输出 ID 列表,用逗号分隔(例如: knowledge-20260302-001, research-20260302-002)。若无相关项请输出 "None"。
|
|
|
|
|
-"""
|
|
|
|
|
|
|
+请输出最相关的 {top_k} 个知识 ID,按相关性从高到低排序,用逗号分隔。
|
|
|
|
|
+只输出 ID,不要其他内容。"""
|
|
|
|
|
|
|
|
try:
|
|
try:
|
|
|
- print(f"\n[Step 1: 知识语义路由] 任务: '{query_text}' | 候选总数: {len(metadata_list)} | 目标提取数: {routing_k}")
|
|
|
|
|
-
|
|
|
|
|
response = await openrouter_llm_call(
|
|
response = await openrouter_llm_call(
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
model="google/gemini-2.5-flash-lite"
|
|
model="google/gemini-2.5-flash-lite"
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
content = response.get("content", "").strip()
|
|
content = response.get("content", "").strip()
|
|
|
- selected_ids = [idx.strip() for idx in re.split(r'[,\s]+', content) if idx.strip().startswith(("knowledge-", "research-"))]
|
|
|
|
|
|
|
+ # 解析 ID 列表
|
|
|
|
|
+ selected_ids = [
|
|
|
|
|
+ idx.strip()
|
|
|
|
|
+ for idx in re.split(r'[,\s]+', content)
|
|
|
|
|
+ if idx.strip().startswith(("knowledge-", "research-"))
|
|
|
|
|
+ ]
|
|
|
|
|
+
|
|
|
|
|
+ return selected_ids[:top_k]
|
|
|
|
|
|
|
|
- print(f"[Step 1: 知识语义路由] LLM 初选 ID ({len(selected_ids)}个): {selected_ids}")
|
|
|
|
|
- return selected_ids
|
|
|
|
|
except Exception as e:
|
|
except Exception as e:
|
|
|
- print(f"LLM 知识路由失败: {e}")
|
|
|
|
|
|
|
+ print(f"[LLM Rerank] 失败: {e}")
|
|
|
return []
|
|
return []
|
|
|
|
|
|
|
|
|
|
|
|
|
-async def _search_knowledge_two_stage(
|
|
|
|
|
- query_text: str,
|
|
|
|
|
- top_k: int = 5,
|
|
|
|
|
- min_score: int = 3,
|
|
|
|
|
- types_filter: Optional[list[str]] = None,
|
|
|
|
|
- owner_filter: Optional[str] = None,
|
|
|
|
|
- conn: sqlite3.Connection = None
|
|
|
|
|
-) -> list[dict]:
|
|
|
|
|
- """
|
|
|
|
|
- 两阶段检索:语义路由 + 质量精排
|
|
|
|
|
- """
|
|
|
|
|
- if conn is None:
|
|
|
|
|
- conn = get_db()
|
|
|
|
|
- should_close = True
|
|
|
|
|
- else:
|
|
|
|
|
- should_close = False
|
|
|
|
|
-
|
|
|
|
|
- try:
|
|
|
|
|
- # 阶段 1: 解析所有知识
|
|
|
|
|
- query = "SELECT * FROM knowledge"
|
|
|
|
|
- rows = conn.execute(query).fetchall()
|
|
|
|
|
-
|
|
|
|
|
- if not rows:
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- content_map = {}
|
|
|
|
|
- metadata_list = []
|
|
|
|
|
-
|
|
|
|
|
- for row in rows:
|
|
|
|
|
- kid = row["id"]
|
|
|
|
|
- types = json.loads(row["types"])
|
|
|
|
|
-
|
|
|
|
|
- # 标签过滤
|
|
|
|
|
- if types_filter:
|
|
|
|
|
- if not any(t in types for t in types_filter):
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- # owner 过滤
|
|
|
|
|
- if owner_filter and row["owner"] != owner_filter:
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- task = row["task"]
|
|
|
|
|
- content_text = row["content"]
|
|
|
|
|
- eval_data = json.loads(row["eval"])
|
|
|
|
|
- source = json.loads(row["source"])
|
|
|
|
|
-
|
|
|
|
|
- meta_item = {
|
|
|
|
|
- "id": kid,
|
|
|
|
|
- "types": types,
|
|
|
|
|
- "task": task,
|
|
|
|
|
- "score": eval_data.get("score", 3),
|
|
|
|
|
- "helpful": eval_data.get("helpful", 0),
|
|
|
|
|
- "harmful": eval_data.get("harmful", 0),
|
|
|
|
|
- }
|
|
|
|
|
- metadata_list.append(meta_item)
|
|
|
|
|
- content_map[kid] = {
|
|
|
|
|
- "task": task,
|
|
|
|
|
- "content": content_text,
|
|
|
|
|
- "types": types,
|
|
|
|
|
- "tags": json.loads(row["tags"]),
|
|
|
|
|
- "scopes": json.loads(row["scopes"]),
|
|
|
|
|
- "owner": row["owner"],
|
|
|
|
|
- "score": meta_item["score"],
|
|
|
|
|
- "helpful": meta_item["helpful"],
|
|
|
|
|
- "harmful": meta_item["harmful"],
|
|
|
|
|
- "message_id": row["message_id"],
|
|
|
|
|
- "source": source,
|
|
|
|
|
- "eval": eval_data,
|
|
|
|
|
- "created_at": row["created_at"],
|
|
|
|
|
- "updated_at": row["updated_at"]
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- if not metadata_list:
|
|
|
|
|
- return []
|
|
|
|
|
-
|
|
|
|
|
- # 阶段 2: 语义路由 (取 2*k)
|
|
|
|
|
- candidate_ids = await _route_knowledge_by_llm(query_text, metadata_list, k=top_k)
|
|
|
|
|
-
|
|
|
|
|
- # 阶段 3: 质量精排
|
|
|
|
|
- print(f"[Step 2: 知识质量精排] 正在根据评分和反馈进行打分...")
|
|
|
|
|
- scored_items = []
|
|
|
|
|
-
|
|
|
|
|
- for kid in candidate_ids:
|
|
|
|
|
- if kid in content_map:
|
|
|
|
|
- item = content_map[kid]
|
|
|
|
|
- score = item["score"]
|
|
|
|
|
- helpful = item["helpful"]
|
|
|
|
|
- harmful = item["harmful"]
|
|
|
|
|
-
|
|
|
|
|
- # 计算综合分:基础分 + helpful - harmful*2
|
|
|
|
|
- quality_score = score + helpful - (harmful * 2.0)
|
|
|
|
|
-
|
|
|
|
|
- # 过滤门槛
|
|
|
|
|
- if score < min_score or quality_score < 0:
|
|
|
|
|
- print(f" - 剔除低质量知识: {kid} (Score: {score}, Helpful: {helpful}, Harmful: {harmful})")
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- scored_items.append({
|
|
|
|
|
- "id": kid,
|
|
|
|
|
- "message_id": item["message_id"],
|
|
|
|
|
- "types": item["types"],
|
|
|
|
|
- "task": item["task"],
|
|
|
|
|
- "tags": item["tags"],
|
|
|
|
|
- "scopes": item["scopes"],
|
|
|
|
|
- "owner": item["owner"],
|
|
|
|
|
- "content": item["content"],
|
|
|
|
|
- "source": item["source"],
|
|
|
|
|
- "eval": item["eval"],
|
|
|
|
|
- "quality_score": quality_score,
|
|
|
|
|
- "created_at": item["created_at"],
|
|
|
|
|
- "updated_at": item["updated_at"]
|
|
|
|
|
- })
|
|
|
|
|
-
|
|
|
|
|
- # 按照质量分排序
|
|
|
|
|
- final_sorted = sorted(scored_items, key=lambda x: x["quality_score"], reverse=True)
|
|
|
|
|
-
|
|
|
|
|
- # 截取最终的 top_k
|
|
|
|
|
- result = final_sorted[:top_k]
|
|
|
|
|
-
|
|
|
|
|
- print(f"[Step 2: 知识质量精排] 最终选定知识: {[it['id'] for it in result]}")
|
|
|
|
|
- print(f"[Knowledge System] 检索结束。\n")
|
|
|
|
|
- return result
|
|
|
|
|
-
|
|
|
|
|
- finally:
|
|
|
|
|
- if should_close:
|
|
|
|
|
- conn.close()
|
|
|
|
|
-
|
|
|
|
|
-
|
|
|
|
|
@app.get("/api/knowledge/search")
|
|
@app.get("/api/knowledge/search")
|
|
|
async def search_knowledge_api(
|
|
async def search_knowledge_api(
|
|
|
q: str = Query(..., description="查询文本"),
|
|
q: str = Query(..., description="查询文本"),
|
|
@@ -681,37 +556,64 @@ async def search_knowledge_api(
|
|
|
types: Optional[str] = None,
|
|
types: Optional[str] = None,
|
|
|
owner: Optional[str] = None
|
|
owner: Optional[str] = None
|
|
|
):
|
|
):
|
|
|
- """检索知识(两阶段:语义路由 + 质量精排)"""
|
|
|
|
|
- conn = get_db()
|
|
|
|
|
|
|
+ """检索知识(向量召回 + LLM 精排)"""
|
|
|
try:
|
|
try:
|
|
|
- types_filter = types.split(",") if types else None
|
|
|
|
|
-
|
|
|
|
|
- results = await _search_knowledge_two_stage(
|
|
|
|
|
- query_text=q,
|
|
|
|
|
- top_k=top_k,
|
|
|
|
|
- min_score=min_score,
|
|
|
|
|
- types_filter=types_filter,
|
|
|
|
|
- owner_filter=owner,
|
|
|
|
|
- conn=conn
|
|
|
|
|
|
|
+ # 1. 生成查询向量
|
|
|
|
|
+ query_embedding = await get_embedding(q)
|
|
|
|
|
+
|
|
|
|
|
+ # 2. 构建过滤表达式
|
|
|
|
|
+ filters = []
|
|
|
|
|
+ if types:
|
|
|
|
|
+ type_list = [t.strip() for t in types.split(',') if t.strip()]
|
|
|
|
|
+ for t in type_list:
|
|
|
|
|
+ filters.append(f'JSON_CONTAINS(types, "{t}")')
|
|
|
|
|
+ if owner:
|
|
|
|
|
+ filters.append(f'owner == "{owner}"')
|
|
|
|
|
+
|
|
|
|
|
+ # 添加 min_score 过滤
|
|
|
|
|
+ filters.append(f'JSON_EXTRACT(eval, "$.score") >= {min_score}')
|
|
|
|
|
+
|
|
|
|
|
+ filter_expr = ' and '.join(filters) if filters else None
|
|
|
|
|
+
|
|
|
|
|
+ # 3. 向量召回(3*k 个候选)
|
|
|
|
|
+ recall_limit = top_k * 3
|
|
|
|
|
+ candidates = milvus_store.search(
|
|
|
|
|
+ query_embedding=query_embedding,
|
|
|
|
|
+ filters=filter_expr,
|
|
|
|
|
+ limit=recall_limit
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
- return {"results": results, "count": len(results)}
|
|
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+ if not candidates:
|
|
|
|
|
+ return {"results": [], "count": 0, "reranked": False}
|
|
|
|
|
+
|
|
|
|
|
+ # 4. LLM 精排
|
|
|
|
|
+ reranked_ids = await _llm_rerank(q, candidates, top_k)
|
|
|
|
|
+
|
|
|
|
|
+ if reranked_ids:
|
|
|
|
|
+ # 按 LLM 排序返回
|
|
|
|
|
+ id_to_candidate = {c["id"]: c for c in candidates}
|
|
|
|
|
+ results = [id_to_candidate[id] for id in reranked_ids if id in id_to_candidate]
|
|
|
|
|
+ return {"results": results, "count": len(results), "reranked": True}
|
|
|
|
|
+ else:
|
|
|
|
|
+ # Fallback:直接返回向量召回的 top k
|
|
|
|
|
+ print(f"[Knowledge Search] LLM 精排失败,fallback 到向量 top-{top_k}")
|
|
|
|
|
+ return {"results": candidates[:top_k], "count": len(candidates[:top_k]), "reranked": False}
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Knowledge Search] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/knowledge", status_code=201)
|
|
@app.post("/api/knowledge", status_code=201)
|
|
|
-def save_knowledge(knowledge: KnowledgeIn):
|
|
|
|
|
|
|
+async def save_knowledge(knowledge: KnowledgeIn):
|
|
|
"""保存新知识"""
|
|
"""保存新知识"""
|
|
|
- import uuid
|
|
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
# 生成 ID
|
|
# 生成 ID
|
|
|
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
|
random_suffix = uuid.uuid4().hex[:4]
|
|
random_suffix = uuid.uuid4().hex[:4]
|
|
|
knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
|
|
knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
|
|
|
|
|
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
|
+ now = int(time.time())
|
|
|
|
|
|
|
|
# 设置默认值
|
|
# 设置默认值
|
|
|
owner = knowledge.owner or f"agent:{knowledge.source.get('agent_id', 'unknown')}"
|
|
owner = knowledge.owner or f"agent:{knowledge.source.get('agent_id', 'unknown')}"
|
|
@@ -723,7 +625,7 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
"urls": knowledge.source.get("urls", []),
|
|
"urls": knowledge.source.get("urls", []),
|
|
|
"agent_id": knowledge.source.get("agent_id", "unknown"),
|
|
"agent_id": knowledge.source.get("agent_id", "unknown"),
|
|
|
"submitted_by": knowledge.source.get("submitted_by", ""),
|
|
"submitted_by": knowledge.source.get("submitted_by", ""),
|
|
|
- "timestamp": now,
|
|
|
|
|
|
|
+ "timestamp": datetime.now(timezone.utc).isoformat(),
|
|
|
"message_id": knowledge.message_id
|
|
"message_id": knowledge.message_id
|
|
|
}
|
|
}
|
|
|
|
|
|
|
@@ -737,31 +639,33 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
"harmful_history": []
|
|
"harmful_history": []
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- conn.execute(
|
|
|
|
|
- """INSERT INTO knowledge
|
|
|
|
|
- (id, message_id, types, task, tags, scopes, owner, content,
|
|
|
|
|
- resource_ids, source, eval, created_at, updated_at)
|
|
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
- (
|
|
|
|
|
- knowledge_id,
|
|
|
|
|
- knowledge.message_id,
|
|
|
|
|
- json.dumps(knowledge.types),
|
|
|
|
|
- knowledge.task,
|
|
|
|
|
- json.dumps(knowledge.tags),
|
|
|
|
|
- json.dumps(knowledge.scopes),
|
|
|
|
|
- owner,
|
|
|
|
|
- knowledge.content,
|
|
|
|
|
- json.dumps(knowledge.resource_ids),
|
|
|
|
|
- json.dumps(source),
|
|
|
|
|
- json.dumps(eval_data),
|
|
|
|
|
- now,
|
|
|
|
|
- now,
|
|
|
|
|
- ),
|
|
|
|
|
- )
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
+ # 生成向量
|
|
|
|
|
+ text = f"{knowledge.task}\n{knowledge.content}"
|
|
|
|
|
+ embedding = await get_embedding(text)
|
|
|
|
|
+
|
|
|
|
|
+ # 插入 Milvus
|
|
|
|
|
+ milvus_store.insert({
|
|
|
|
|
+ "id": knowledge_id,
|
|
|
|
|
+ "embedding": embedding,
|
|
|
|
|
+ "message_id": knowledge.message_id,
|
|
|
|
|
+ "task": knowledge.task,
|
|
|
|
|
+ "content": knowledge.content,
|
|
|
|
|
+ "types": knowledge.types,
|
|
|
|
|
+ "tags": knowledge.tags,
|
|
|
|
|
+ "scopes": knowledge.scopes,
|
|
|
|
|
+ "owner": owner,
|
|
|
|
|
+ "resource_ids": knowledge.resource_ids,
|
|
|
|
|
+ "source": source,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
|
|
+ "created_at": now,
|
|
|
|
|
+ "updated_at": now,
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Save Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/knowledge")
|
|
@app.get("/api/knowledge")
|
|
@@ -773,112 +677,78 @@ def list_knowledge(
|
|
|
tags: Optional[str] = None
|
|
tags: Optional[str] = None
|
|
|
):
|
|
):
|
|
|
"""列出知识(支持后端筛选)"""
|
|
"""列出知识(支持后端筛选)"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- query = "SELECT * FROM knowledge"
|
|
|
|
|
- params = []
|
|
|
|
|
- conditions = []
|
|
|
|
|
|
|
+ # 构建过滤表达式
|
|
|
|
|
+ filters = []
|
|
|
|
|
|
|
|
# types 支持多个,用 AND 连接(交集:必须同时包含所有选中的type)
|
|
# types 支持多个,用 AND 连接(交集:必须同时包含所有选中的type)
|
|
|
if types:
|
|
if types:
|
|
|
type_list = [t.strip() for t in types.split(',') if t.strip()]
|
|
type_list = [t.strip() for t in types.split(',') if t.strip()]
|
|
|
- if type_list:
|
|
|
|
|
- for t in type_list:
|
|
|
|
|
- conditions.append("types LIKE ?")
|
|
|
|
|
- params.append(f"%{t}%")
|
|
|
|
|
|
|
+ for t in type_list:
|
|
|
|
|
+ filters.append(f'JSON_CONTAINS(types, "{t}")')
|
|
|
|
|
|
|
|
if scopes:
|
|
if scopes:
|
|
|
- conditions.append("scopes LIKE ?")
|
|
|
|
|
- params.append(f"%{scopes}%")
|
|
|
|
|
|
|
+ filters.append(f'JSON_CONTAINS(scopes, "{scopes}")')
|
|
|
|
|
|
|
|
if owner:
|
|
if owner:
|
|
|
- conditions.append("owner LIKE ?")
|
|
|
|
|
- params.append(f"%{owner}%")
|
|
|
|
|
|
|
+ filters.append(f'owner like "%{owner}%"')
|
|
|
|
|
|
|
|
# tags 支持多个,用 AND 连接(交集:必须同时包含所有选中的tag)
|
|
# tags 支持多个,用 AND 连接(交集:必须同时包含所有选中的tag)
|
|
|
if tags:
|
|
if tags:
|
|
|
tag_list = [t.strip() for t in tags.split(',') if t.strip()]
|
|
tag_list = [t.strip() for t in tags.split(',') if t.strip()]
|
|
|
- if tag_list:
|
|
|
|
|
- for t in tag_list:
|
|
|
|
|
- conditions.append("tags LIKE ?")
|
|
|
|
|
- params.append(f"%{t}%")
|
|
|
|
|
-
|
|
|
|
|
- if conditions:
|
|
|
|
|
- query += " WHERE " + " AND ".join(conditions)
|
|
|
|
|
|
|
+ for t in tag_list:
|
|
|
|
|
+ filters.append(f'JSON_CONTAINS_ANY(tags, ["{t}"])')
|
|
|
|
|
|
|
|
- query += " ORDER BY created_at DESC LIMIT ?"
|
|
|
|
|
- params.append(limit)
|
|
|
|
|
|
|
+ # 如果没有过滤条件,查询所有
|
|
|
|
|
+ filter_expr = ' and '.join(filters) if filters else 'id != ""'
|
|
|
|
|
|
|
|
- rows = conn.execute(query, params).fetchall()
|
|
|
|
|
-
|
|
|
|
|
- results = []
|
|
|
|
|
- for row in rows:
|
|
|
|
|
- results.append({
|
|
|
|
|
- "id": row["id"],
|
|
|
|
|
- "message_id": row["message_id"],
|
|
|
|
|
- "types": json.loads(row["types"]),
|
|
|
|
|
- "task": row["task"],
|
|
|
|
|
- "tags": json.loads(row["tags"]),
|
|
|
|
|
- "scopes": json.loads(row["scopes"]),
|
|
|
|
|
- "owner": row["owner"],
|
|
|
|
|
- "content": row["content"],
|
|
|
|
|
- "source": json.loads(row["source"]),
|
|
|
|
|
- "eval": json.loads(row["eval"]),
|
|
|
|
|
- "created_at": row["created_at"],
|
|
|
|
|
- "updated_at": row["updated_at"]
|
|
|
|
|
- })
|
|
|
|
|
|
|
+ # 查询 Milvus
|
|
|
|
|
+ results = milvus_store.query(filter_expr, limit=limit)
|
|
|
|
|
|
|
|
return {"results": results, "count": len(results)}
|
|
return {"results": results, "count": len(results)}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[List Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/knowledge/meta/tags")
|
|
@app.get("/api/knowledge/meta/tags")
|
|
|
def get_all_tags():
|
|
def get_all_tags():
|
|
|
"""获取所有已有的 tags"""
|
|
"""获取所有已有的 tags"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- rows = conn.execute("SELECT tags FROM knowledge").fetchall()
|
|
|
|
|
|
|
+ # 查询所有知识
|
|
|
|
|
+ results = milvus_store.query('id != ""', limit=10000)
|
|
|
|
|
+
|
|
|
all_tags = set()
|
|
all_tags = set()
|
|
|
- for row in rows:
|
|
|
|
|
- tags_dict = json.loads(row["tags"])
|
|
|
|
|
- for key in tags_dict.keys():
|
|
|
|
|
- all_tags.add(key)
|
|
|
|
|
|
|
+ for item in results:
|
|
|
|
|
+ tags_dict = item.get("tags", {})
|
|
|
|
|
+ if isinstance(tags_dict, dict):
|
|
|
|
|
+ for key in tags_dict.keys():
|
|
|
|
|
+ all_tags.add(key)
|
|
|
|
|
+
|
|
|
return {"tags": sorted(list(all_tags))}
|
|
return {"tags": sorted(list(all_tags))}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Get Tags] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.get("/api/knowledge/{knowledge_id}")
|
|
@app.get("/api/knowledge/{knowledge_id}")
|
|
|
def get_knowledge(knowledge_id: str):
|
|
def get_knowledge(knowledge_id: str):
|
|
|
"""获取单条知识"""
|
|
"""获取单条知识"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- row = conn.execute(
|
|
|
|
|
- "SELECT * FROM knowledge WHERE id = ?",
|
|
|
|
|
- (knowledge_id,)
|
|
|
|
|
- ).fetchone()
|
|
|
|
|
|
|
+ result = milvus_store.get_by_id(knowledge_id)
|
|
|
|
|
|
|
|
- if not row:
|
|
|
|
|
|
|
+ if not result:
|
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
|
|
|
|
|
|
- return {
|
|
|
|
|
- "id": row["id"],
|
|
|
|
|
- "message_id": row["message_id"],
|
|
|
|
|
- "types": json.loads(row["types"]),
|
|
|
|
|
- "task": row["task"],
|
|
|
|
|
- "tags": json.loads(row["tags"]),
|
|
|
|
|
- "scopes": json.loads(row["scopes"]),
|
|
|
|
|
- "owner": row["owner"],
|
|
|
|
|
- "content": row["content"],
|
|
|
|
|
- "resource_ids": json.loads(row["resource_ids"]),
|
|
|
|
|
- "source": json.loads(row["source"]),
|
|
|
|
|
- "eval": json.loads(row["eval"]),
|
|
|
|
|
- "created_at": row["created_at"],
|
|
|
|
|
- "updated_at": row["updated_at"]
|
|
|
|
|
- }
|
|
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+ return result
|
|
|
|
|
+
|
|
|
|
|
+ except HTTPException:
|
|
|
|
|
+ raise
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Get Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
|
|
async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
|
|
@@ -914,14 +784,13 @@ async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
|
|
|
@app.put("/api/knowledge/{knowledge_id}")
|
|
@app.put("/api/knowledge/{knowledge_id}")
|
|
|
async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
|
|
async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
|
|
|
"""更新知识评估,支持知识进化"""
|
|
"""更新知识评估,支持知识进化"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- row = conn.execute("SELECT * FROM knowledge WHERE id = ?", (knowledge_id,)).fetchone()
|
|
|
|
|
- if not row:
|
|
|
|
|
|
|
+ # 获取现有知识
|
|
|
|
|
+ existing = milvus_store.get_by_id(knowledge_id)
|
|
|
|
|
+ if not existing:
|
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
|
|
|
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
- eval_data = json.loads(row["eval"])
|
|
|
|
|
|
|
+ eval_data = existing.get("eval", {})
|
|
|
|
|
|
|
|
# 更新评分
|
|
# 更新评分
|
|
|
if update.update_score is not None:
|
|
if update.update_score is not None:
|
|
@@ -942,69 +811,91 @@ async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
|
|
|
eval_data["harmful_history"].append(update.add_harmful_case)
|
|
eval_data["harmful_history"].append(update.add_harmful_case)
|
|
|
|
|
|
|
|
# 知识进化
|
|
# 知识进化
|
|
|
- content = row["content"]
|
|
|
|
|
|
|
+ content = existing["content"]
|
|
|
|
|
+ need_reembed = False
|
|
|
|
|
+
|
|
|
if update.evolve_feedback:
|
|
if update.evolve_feedback:
|
|
|
content = await _evolve_knowledge_with_llm(content, update.evolve_feedback)
|
|
content = await _evolve_knowledge_with_llm(content, update.evolve_feedback)
|
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
|
|
+ need_reembed = True
|
|
|
|
|
|
|
|
- # 更新数据库
|
|
|
|
|
- conn.execute(
|
|
|
|
|
- "UPDATE knowledge SET content = ?, eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (content, json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
|
|
- )
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
+ # 准备更新数据
|
|
|
|
|
+ updates = {
|
|
|
|
|
+ "content": content,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 如果内容变化,重新生成向量
|
|
|
|
|
+ if need_reembed:
|
|
|
|
|
+ text = f"{existing['task']}\n{content}"
|
|
|
|
|
+ embedding = await get_embedding(text)
|
|
|
|
|
+ updates["embedding"] = embedding
|
|
|
|
|
+
|
|
|
|
|
+ # 更新 Milvus
|
|
|
|
|
+ milvus_store.update(knowledge_id, updates)
|
|
|
|
|
|
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except HTTPException:
|
|
|
|
|
+ raise
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Update Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.patch("/api/knowledge/{knowledge_id}")
|
|
@app.patch("/api/knowledge/{knowledge_id}")
|
|
|
-def patch_knowledge(knowledge_id: str, patch: KnowledgePatchIn):
|
|
|
|
|
|
|
+async def patch_knowledge(knowledge_id: str, patch: KnowledgePatchIn):
|
|
|
"""直接编辑知识字段"""
|
|
"""直接编辑知识字段"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- row = conn.execute("SELECT * FROM knowledge WHERE id = ?", (knowledge_id,)).fetchone()
|
|
|
|
|
- if not row:
|
|
|
|
|
|
|
+ # 获取现有知识
|
|
|
|
|
+ existing = milvus_store.get_by_id(knowledge_id)
|
|
|
|
|
+ if not existing:
|
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
|
|
|
|
|
|
- updates = []
|
|
|
|
|
- params = []
|
|
|
|
|
|
|
+ updates = {}
|
|
|
|
|
+ need_reembed = False
|
|
|
|
|
|
|
|
if patch.task is not None:
|
|
if patch.task is not None:
|
|
|
- updates.append("task = ?")
|
|
|
|
|
- params.append(patch.task)
|
|
|
|
|
|
|
+ updates["task"] = patch.task
|
|
|
|
|
+ need_reembed = True
|
|
|
|
|
+
|
|
|
if patch.content is not None:
|
|
if patch.content is not None:
|
|
|
- updates.append("content = ?")
|
|
|
|
|
- params.append(patch.content)
|
|
|
|
|
|
|
+ updates["content"] = patch.content
|
|
|
|
|
+ need_reembed = True
|
|
|
|
|
+
|
|
|
if patch.types is not None:
|
|
if patch.types is not None:
|
|
|
- updates.append("types = ?")
|
|
|
|
|
- params.append(json.dumps(patch.types, ensure_ascii=False))
|
|
|
|
|
|
|
+ updates["types"] = patch.types
|
|
|
|
|
+
|
|
|
if patch.tags is not None:
|
|
if patch.tags is not None:
|
|
|
- updates.append("tags = ?")
|
|
|
|
|
- params.append(json.dumps(patch.tags, ensure_ascii=False))
|
|
|
|
|
|
|
+ updates["tags"] = patch.tags
|
|
|
|
|
+
|
|
|
if patch.scopes is not None:
|
|
if patch.scopes is not None:
|
|
|
- updates.append("scopes = ?")
|
|
|
|
|
- params.append(json.dumps(patch.scopes, ensure_ascii=False))
|
|
|
|
|
|
|
+ updates["scopes"] = patch.scopes
|
|
|
|
|
+
|
|
|
if patch.owner is not None:
|
|
if patch.owner is not None:
|
|
|
- updates.append("owner = ?")
|
|
|
|
|
- params.append(patch.owner)
|
|
|
|
|
|
|
+ updates["owner"] = patch.owner
|
|
|
|
|
|
|
|
if not updates:
|
|
if not updates:
|
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
|
|
|
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
- updates.append("updated_at = ?")
|
|
|
|
|
- params.append(now)
|
|
|
|
|
- params.append(knowledge_id)
|
|
|
|
|
|
|
+ # 如果 task 或 content 变化,重新生成向量
|
|
|
|
|
+ if need_reembed:
|
|
|
|
|
+ task = updates.get("task", existing["task"])
|
|
|
|
|
+ content = updates.get("content", existing["content"])
|
|
|
|
|
+ text = f"{task}\n{content}"
|
|
|
|
|
+ embedding = await get_embedding(text)
|
|
|
|
|
+ updates["embedding"] = embedding
|
|
|
|
|
|
|
|
- query = f"UPDATE knowledge SET {', '.join(updates)} WHERE id = ?"
|
|
|
|
|
- conn.execute(query, params)
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
+ # 更新 Milvus
|
|
|
|
|
+ milvus_store.update(knowledge_id, updates)
|
|
|
|
|
|
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except HTTPException:
|
|
|
|
|
+ raise
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Patch Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/knowledge/batch_update")
|
|
@app.post("/api/knowledge/batch_update")
|
|
@@ -1013,7 +904,6 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
if not batch.feedback_list:
|
|
if not batch.feedback_list:
|
|
|
return {"status": "ok", "updated": 0}
|
|
return {"status": "ok", "updated": 0}
|
|
|
|
|
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
# 先处理无需进化的,收集需要进化的
|
|
# 先处理无需进化的,收集需要进化的
|
|
|
evolution_tasks = [] # [(knowledge_id, old_content, feedback, eval_data)]
|
|
evolution_tasks = [] # [(knowledge_id, old_content, feedback, eval_data)]
|
|
@@ -1027,67 +917,72 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
if not knowledge_id:
|
|
if not knowledge_id:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- row = conn.execute("SELECT * FROM knowledge WHERE id = ?", (knowledge_id,)).fetchone()
|
|
|
|
|
- if not row:
|
|
|
|
|
|
|
+ existing = milvus_store.get_by_id(knowledge_id)
|
|
|
|
|
+ if not existing:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- eval_data = json.loads(row["eval"])
|
|
|
|
|
|
|
+ eval_data = existing.get("eval", {})
|
|
|
|
|
|
|
|
if is_effective and feedback:
|
|
if is_effective and feedback:
|
|
|
- evolution_tasks.append((knowledge_id, row["content"], feedback, eval_data))
|
|
|
|
|
|
|
+ evolution_tasks.append((knowledge_id, existing["content"], feedback, eval_data, existing["task"]))
|
|
|
else:
|
|
else:
|
|
|
simple_updates.append((knowledge_id, is_effective, eval_data))
|
|
simple_updates.append((knowledge_id, is_effective, eval_data))
|
|
|
|
|
|
|
|
# 执行简单更新
|
|
# 执行简单更新
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
for knowledge_id, is_effective, eval_data in simple_updates:
|
|
for knowledge_id, is_effective, eval_data in simple_updates:
|
|
|
if is_effective:
|
|
if is_effective:
|
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
else:
|
|
else:
|
|
|
eval_data["harmful"] = eval_data.get("harmful", 0) + 1
|
|
eval_data["harmful"] = eval_data.get("harmful", 0) + 1
|
|
|
|
|
|
|
|
- conn.execute(
|
|
|
|
|
- "UPDATE knowledge SET eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ milvus_store.update(knowledge_id, {"eval": eval_data})
|
|
|
|
|
|
|
|
# 并发执行知识进化
|
|
# 并发执行知识进化
|
|
|
if evolution_tasks:
|
|
if evolution_tasks:
|
|
|
print(f"🧬 并发处理 {len(evolution_tasks)} 条知识进化...")
|
|
print(f"🧬 并发处理 {len(evolution_tasks)} 条知识进化...")
|
|
|
evolved_results = await asyncio.gather(
|
|
evolved_results = await asyncio.gather(
|
|
|
- *[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _ in evolution_tasks]
|
|
|
|
|
|
|
+ *[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _, _ in evolution_tasks]
|
|
|
)
|
|
)
|
|
|
- for (knowledge_id, _, _, eval_data), evolved_content in zip(evolution_tasks, evolved_results):
|
|
|
|
|
|
|
+
|
|
|
|
|
+ for (knowledge_id, _, _, eval_data, task), evolved_content in zip(evolution_tasks, evolved_results):
|
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
- conn.execute(
|
|
|
|
|
- "UPDATE knowledge SET content = ?, eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (evolved_content, json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
|
|
- )
|
|
|
|
|
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
+ # 重新生成向量
|
|
|
|
|
+ text = f"{task}\n{evolved_content}"
|
|
|
|
|
+ embedding = await get_embedding(text)
|
|
|
|
|
+
|
|
|
|
|
+ milvus_store.update(knowledge_id, {
|
|
|
|
|
+ "content": evolved_content,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
|
|
+ "embedding": embedding
|
|
|
|
|
+ })
|
|
|
|
|
+
|
|
|
return {"status": "ok", "updated": len(simple_updates) + len(evolution_tasks)}
|
|
return {"status": "ok", "updated": len(simple_updates) + len(evolution_tasks)}
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Batch Update] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/knowledge/slim")
|
|
@app.post("/api/knowledge/slim")
|
|
|
async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
|
|
async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
|
|
|
"""知识库瘦身:合并语义相似知识"""
|
|
"""知识库瘦身:合并语义相似知识"""
|
|
|
- conn = get_db()
|
|
|
|
|
try:
|
|
try:
|
|
|
- rows = conn.execute("SELECT * FROM knowledge").fetchall()
|
|
|
|
|
- if len(rows) < 2:
|
|
|
|
|
- return {"status": "ok", "message": f"知识库仅有 {len(rows)} 条,无需瘦身"}
|
|
|
|
|
|
|
+ # 获取所有知识
|
|
|
|
|
+ all_knowledge = milvus_store.query('id != ""', limit=10000)
|
|
|
|
|
+
|
|
|
|
|
+ if len(all_knowledge) < 2:
|
|
|
|
|
+ return {"status": "ok", "message": f"知识库仅有 {len(all_knowledge)} 条,无需瘦身"}
|
|
|
|
|
|
|
|
# 构造发给大模型的内容
|
|
# 构造发给大模型的内容
|
|
|
entries_text = ""
|
|
entries_text = ""
|
|
|
- for row in rows:
|
|
|
|
|
- eval_data = json.loads(row["eval"])
|
|
|
|
|
- types = json.loads(row["types"])
|
|
|
|
|
- entries_text += f"[ID: {row['id']}] [Types: {','.join(types)}] "
|
|
|
|
|
|
|
+ for item in all_knowledge:
|
|
|
|
|
+ eval_data = item.get("eval", {})
|
|
|
|
|
+ types = item.get("types", [])
|
|
|
|
|
+ entries_text += f"[ID: {item['id']}] [Types: {','.join(types)}] "
|
|
|
entries_text += f"[Helpful: {eval_data.get('helpful', 0)}, Harmful: {eval_data.get('harmful', 0)}] [Score: {eval_data.get('score', 3)}]\n"
|
|
entries_text += f"[Helpful: {eval_data.get('helpful', 0)}, Harmful: {eval_data.get('harmful', 0)}] [Score: {eval_data.get('score', 3)}]\n"
|
|
|
- entries_text += f"Task: {row['task']}\n"
|
|
|
|
|
- entries_text += f"Content: {row['content'][:200]}...\n\n"
|
|
|
|
|
|
|
+ entries_text += f"Task: {item['task']}\n"
|
|
|
|
|
+ entries_text += f"Content: {item['content'][:200]}...\n\n"
|
|
|
|
|
|
|
|
prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
|
|
prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
|
|
|
|
|
|
|
@@ -1115,7 +1010,7 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
|
|
|
|
|
禁止输出任何开场白或解释。"""
|
|
禁止输出任何开场白或解释。"""
|
|
|
|
|
|
|
|
- print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(rows)} 条知识...")
|
|
|
|
|
|
|
+ print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(all_knowledge)} 条知识...")
|
|
|
response = await openrouter_llm_call(
|
|
response = await openrouter_llm_call(
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
model=model
|
|
model=model
|
|
@@ -1189,10 +1084,20 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
if not new_entries:
|
|
if not new_entries:
|
|
|
raise HTTPException(status_code=500, detail="解析大模型输出失败")
|
|
raise HTTPException(status_code=500, detail="解析大模型输出失败")
|
|
|
|
|
|
|
|
- # 原子化写回
|
|
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
- conn.execute("DELETE FROM knowledge")
|
|
|
|
|
- for e in new_entries:
|
|
|
|
|
|
|
+ # 生成向量并重建知识库
|
|
|
|
|
+ print(f"[知识瘦身] 正在为 {len(new_entries)} 条知识生成向量...")
|
|
|
|
|
+
|
|
|
|
|
+ # 批量生成向量
|
|
|
|
|
+ texts = [f"{e['task']}\n{e['content']}" for e in new_entries]
|
|
|
|
|
+ embeddings = await get_embeddings_batch(texts)
|
|
|
|
|
+
|
|
|
|
|
+ # 清空并重建
|
|
|
|
|
+ now = int(time.time())
|
|
|
|
|
+ milvus_store.drop_collection()
|
|
|
|
|
+ milvus_store._init_collection()
|
|
|
|
|
+
|
|
|
|
|
+ knowledge_list = []
|
|
|
|
|
+ for e, embedding in zip(new_entries, embeddings):
|
|
|
eval_data = {
|
|
eval_data = {
|
|
|
"score": e["score"],
|
|
"score": e["score"],
|
|
|
"helpful": e["helpful"],
|
|
"helpful": e["helpful"],
|
|
@@ -1207,37 +1112,39 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
"urls": [],
|
|
"urls": [],
|
|
|
"agent_id": "slim",
|
|
"agent_id": "slim",
|
|
|
"submitted_by": "system",
|
|
"submitted_by": "system",
|
|
|
- "timestamp": now
|
|
|
|
|
|
|
+ "timestamp": datetime.now(timezone.utc).isoformat()
|
|
|
}
|
|
}
|
|
|
- conn.execute(
|
|
|
|
|
- """INSERT INTO knowledge
|
|
|
|
|
- (id, message_id, types, task, tags, scopes, owner, content, source, eval, created_at, updated_at)
|
|
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
- (
|
|
|
|
|
- e["id"],
|
|
|
|
|
- "",
|
|
|
|
|
- json.dumps(e["types"]),
|
|
|
|
|
- e["task"],
|
|
|
|
|
- json.dumps({}),
|
|
|
|
|
- json.dumps(["org:cybertogether"]),
|
|
|
|
|
- "agent:slim",
|
|
|
|
|
- e["content"],
|
|
|
|
|
- json.dumps(source, ensure_ascii=False),
|
|
|
|
|
- json.dumps(eval_data, ensure_ascii=False),
|
|
|
|
|
- now,
|
|
|
|
|
- now
|
|
|
|
|
- )
|
|
|
|
|
- )
|
|
|
|
|
- conn.commit()
|
|
|
|
|
|
|
+ knowledge_list.append({
|
|
|
|
|
+ "id": e["id"],
|
|
|
|
|
+ "embedding": embedding,
|
|
|
|
|
+ "message_id": "",
|
|
|
|
|
+ "task": e["task"],
|
|
|
|
|
+ "content": e["content"],
|
|
|
|
|
+ "types": e["types"],
|
|
|
|
|
+ "tags": {},
|
|
|
|
|
+ "scopes": ["org:cybertogether"],
|
|
|
|
|
+ "owner": "agent:slim",
|
|
|
|
|
+ "resource_ids": [],
|
|
|
|
|
+ "source": source,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
|
|
+ "created_at": now,
|
|
|
|
|
+ "updated_at": now
|
|
|
|
|
+ })
|
|
|
|
|
|
|
|
- result_msg = f"瘦身完成:{len(rows)} → {len(new_entries)} 条知识"
|
|
|
|
|
|
|
+ milvus_store.insert_batch(knowledge_list)
|
|
|
|
|
+
|
|
|
|
|
+ result_msg = f"瘦身完成:{len(all_knowledge)} → {len(new_entries)} 条知识"
|
|
|
if report_line:
|
|
if report_line:
|
|
|
result_msg += f"\n{report_line}"
|
|
result_msg += f"\n{report_line}"
|
|
|
print(f"[知识瘦身] {result_msg}")
|
|
print(f"[知识瘦身] {result_msg}")
|
|
|
|
|
|
|
|
- return {"status": "ok", "before": len(rows), "after": len(new_entries), "report": report_line}
|
|
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+ return {"status": "ok", "before": len(all_knowledge), "after": len(new_entries), "report": report_line}
|
|
|
|
|
+
|
|
|
|
|
+ except HTTPException:
|
|
|
|
|
+ raise
|
|
|
|
|
+ except Exception as e:
|
|
|
|
|
+ print(f"[Slim Knowledge] 错误: {e}")
|
|
|
|
|
+ raise HTTPException(status_code=500, detail=str(e))
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/extract")
|
|
@app.post("/api/extract")
|
|
@@ -1321,81 +1228,81 @@ async def extract_knowledge_from_messages(extract_req: MessageExtractIn):
|
|
|
if not isinstance(extracted_knowledge, list):
|
|
if not isinstance(extracted_knowledge, list):
|
|
|
raise ValueError("LLM output is not a list")
|
|
raise ValueError("LLM output is not a list")
|
|
|
|
|
|
|
|
|
|
+ if not extracted_knowledge:
|
|
|
|
|
+ return {"status": "ok", "extracted_count": 0, "knowledge_ids": []}
|
|
|
|
|
+
|
|
|
|
|
+ # 批量生成向量
|
|
|
|
|
+ texts = [f"{item.get('task', '')}\n{item.get('content', '')}" for item in extracted_knowledge]
|
|
|
|
|
+ embeddings = await get_embeddings_batch(texts)
|
|
|
|
|
+
|
|
|
# 保存提取的知识
|
|
# 保存提取的知识
|
|
|
- conn = get_db()
|
|
|
|
|
knowledge_ids = []
|
|
knowledge_ids = []
|
|
|
- now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
|
+ now = int(time.time())
|
|
|
|
|
+ knowledge_list = []
|
|
|
|
|
|
|
|
- try:
|
|
|
|
|
- for item in extracted_knowledge:
|
|
|
|
|
- task = item.get("task", "")
|
|
|
|
|
- knowledge_content = item.get("content", "")
|
|
|
|
|
- types = item.get("types", ["strategy"])
|
|
|
|
|
- score = item.get("score", 3)
|
|
|
|
|
-
|
|
|
|
|
- if not task or not knowledge_content:
|
|
|
|
|
- continue
|
|
|
|
|
-
|
|
|
|
|
- # 生成 ID
|
|
|
|
|
- import uuid
|
|
|
|
|
- timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
|
|
|
- random_suffix = uuid.uuid4().hex[:4]
|
|
|
|
|
- knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
|
|
|
|
|
-
|
|
|
|
|
- # 准备数据
|
|
|
|
|
- source = {
|
|
|
|
|
- "name": "message_extraction",
|
|
|
|
|
- "category": "exp",
|
|
|
|
|
- "urls": [],
|
|
|
|
|
- "agent_id": extract_req.agent_id,
|
|
|
|
|
- "submitted_by": extract_req.submitted_by,
|
|
|
|
|
- "timestamp": now,
|
|
|
|
|
- "session_key": extract_req.session_key
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ for item, embedding in zip(extracted_knowledge, embeddings):
|
|
|
|
|
+ task = item.get("task", "")
|
|
|
|
|
+ knowledge_content = item.get("content", "")
|
|
|
|
|
+ types = item.get("types", ["strategy"])
|
|
|
|
|
+ score = item.get("score", 3)
|
|
|
|
|
|
|
|
- eval_data = {
|
|
|
|
|
- "score": score,
|
|
|
|
|
- "helpful": 1,
|
|
|
|
|
- "harmful": 0,
|
|
|
|
|
- "confidence": 0.7,
|
|
|
|
|
- "helpful_history": [],
|
|
|
|
|
- "harmful_history": []
|
|
|
|
|
- }
|
|
|
|
|
|
|
+ if not task or not knowledge_content:
|
|
|
|
|
+ continue
|
|
|
|
|
+
|
|
|
|
|
+ # 生成 ID
|
|
|
|
|
+ timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
|
|
|
|
|
+ random_suffix = uuid.uuid4().hex[:4]
|
|
|
|
|
+ knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
|
|
|
|
|
+
|
|
|
|
|
+ # 准备数据
|
|
|
|
|
+ source = {
|
|
|
|
|
+ "name": "message_extraction",
|
|
|
|
|
+ "category": "exp",
|
|
|
|
|
+ "urls": [],
|
|
|
|
|
+ "agent_id": extract_req.agent_id,
|
|
|
|
|
+ "submitted_by": extract_req.submitted_by,
|
|
|
|
|
+ "timestamp": datetime.now(timezone.utc).isoformat(),
|
|
|
|
|
+ "session_key": extract_req.session_key
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
- # 插入数据库
|
|
|
|
|
- conn.execute(
|
|
|
|
|
- """INSERT INTO knowledge
|
|
|
|
|
- (id, message_id, types, task, tags, scopes, owner, content,
|
|
|
|
|
- source, eval, created_at, updated_at)
|
|
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
- (
|
|
|
|
|
- knowledge_id,
|
|
|
|
|
- "",
|
|
|
|
|
- json.dumps(types),
|
|
|
|
|
- task,
|
|
|
|
|
- json.dumps({}),
|
|
|
|
|
- json.dumps(["org:cybertogether"]),
|
|
|
|
|
- extract_req.submitted_by,
|
|
|
|
|
- knowledge_content,
|
|
|
|
|
- json.dumps(source, ensure_ascii=False),
|
|
|
|
|
- json.dumps(eval_data, ensure_ascii=False),
|
|
|
|
|
- now,
|
|
|
|
|
- now,
|
|
|
|
|
- ),
|
|
|
|
|
- )
|
|
|
|
|
- knowledge_ids.append(knowledge_id)
|
|
|
|
|
-
|
|
|
|
|
- conn.commit()
|
|
|
|
|
- print(f"[Extract] 成功提取并保存 {len(knowledge_ids)} 条知识")
|
|
|
|
|
-
|
|
|
|
|
- return {
|
|
|
|
|
- "status": "ok",
|
|
|
|
|
- "extracted_count": len(knowledge_ids),
|
|
|
|
|
- "knowledge_ids": knowledge_ids
|
|
|
|
|
|
|
+ eval_data = {
|
|
|
|
|
+ "score": score,
|
|
|
|
|
+ "helpful": 1,
|
|
|
|
|
+ "harmful": 0,
|
|
|
|
|
+ "confidence": 0.7,
|
|
|
|
|
+ "helpful_history": [],
|
|
|
|
|
+ "harmful_history": []
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
- finally:
|
|
|
|
|
- conn.close()
|
|
|
|
|
|
|
+ knowledge_list.append({
|
|
|
|
|
+ "id": knowledge_id,
|
|
|
|
|
+ "embedding": embedding,
|
|
|
|
|
+ "message_id": "",
|
|
|
|
|
+ "task": task,
|
|
|
|
|
+ "content": knowledge_content,
|
|
|
|
|
+ "types": types,
|
|
|
|
|
+ "tags": {},
|
|
|
|
|
+ "scopes": ["org:cybertogether"],
|
|
|
|
|
+ "owner": extract_req.submitted_by,
|
|
|
|
|
+ "resource_ids": [],
|
|
|
|
|
+ "source": source,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
|
|
+ "created_at": now,
|
|
|
|
|
+ "updated_at": now,
|
|
|
|
|
+ })
|
|
|
|
|
+ knowledge_ids.append(knowledge_id)
|
|
|
|
|
+
|
|
|
|
|
+ # 批量插入
|
|
|
|
|
+ if knowledge_list:
|
|
|
|
|
+ milvus_store.insert_batch(knowledge_list)
|
|
|
|
|
+
|
|
|
|
|
+ print(f"[Extract] 成功提取并保存 {len(knowledge_ids)} 条知识")
|
|
|
|
|
+
|
|
|
|
|
+ return {
|
|
|
|
|
+ "status": "ok",
|
|
|
|
|
+ "extracted_count": len(knowledge_ids),
|
|
|
|
|
+ "knowledge_ids": knowledge_ids
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
except json.JSONDecodeError as e:
|
|
except json.JSONDecodeError as e:
|
|
|
print(f"[Extract] JSON 解析失败: {e}")
|
|
print(f"[Extract] JSON 解析失败: {e}")
|