|
|
@@ -10,12 +10,14 @@ import re
|
|
|
import json
|
|
|
import sqlite3
|
|
|
import asyncio
|
|
|
+import base64
|
|
|
from contextlib import asynccontextmanager
|
|
|
from datetime import datetime, timezone
|
|
|
from typing import Optional
|
|
|
from pathlib import Path
|
|
|
+from cryptography.hazmat.primitives.ciphers.aead import AESGCM
|
|
|
|
|
|
-from fastapi import FastAPI, HTTPException, Query
|
|
|
+from fastapi import FastAPI, HTTPException, Query, Header
|
|
|
from fastapi.responses import HTMLResponse
|
|
|
from pydantic import BaseModel, Field
|
|
|
|
|
|
@@ -33,6 +35,15 @@ BRAND_NAME = os.getenv("BRAND_NAME", "KnowHub")
|
|
|
BRAND_API_ENV = os.getenv("BRAND_API_ENV", "KNOWHUB_API")
|
|
|
BRAND_DB = os.getenv("BRAND_DB", "knowhub.db")
|
|
|
|
|
|
+# 组织密钥配置(格式:org1:key1_base64,org2:key2_base64)
|
|
|
+ORG_KEYS_RAW = os.getenv("ORG_KEYS", "")
|
|
|
+ORG_KEYS = {}
|
|
|
+if ORG_KEYS_RAW:
|
|
|
+ for pair in ORG_KEYS_RAW.split(","):
|
|
|
+ if ":" in pair:
|
|
|
+ org, key_b64 = pair.split(":", 1)
|
|
|
+ ORG_KEYS[org.strip()] = key_b64.strip()
|
|
|
+
|
|
|
DB_PATH = Path(__file__).parent / BRAND_DB
|
|
|
|
|
|
# --- 数据库 ---
|
|
|
@@ -44,6 +55,77 @@ def get_db() -> sqlite3.Connection:
|
|
|
return conn
|
|
|
|
|
|
|
|
|
+# --- 加密/解密 ---
|
|
|
+
|
|
|
+def get_org_key(resource_id: str) -> Optional[bytes]:
|
|
|
+ """从content_id提取组织前缀,返回对应密钥"""
|
|
|
+ if "/" in resource_id:
|
|
|
+ org = resource_id.split("/")[0]
|
|
|
+ if org in ORG_KEYS:
|
|
|
+ return base64.b64decode(ORG_KEYS[org])
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def encrypt_content(resource_id: str, plaintext: str) -> str:
|
|
|
+ """加密内容,返回格式:encrypted:AES256-GCM:{base64_data}"""
|
|
|
+ if not plaintext:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ key = get_org_key(resource_id)
|
|
|
+ if not key:
|
|
|
+ # 没有配置密钥,明文存储(不推荐)
|
|
|
+ return plaintext
|
|
|
+
|
|
|
+ aesgcm = AESGCM(key)
|
|
|
+ nonce = os.urandom(12) # 96-bit nonce
|
|
|
+ ciphertext = aesgcm.encrypt(nonce, plaintext.encode("utf-8"), None)
|
|
|
+
|
|
|
+ # 组合 nonce + ciphertext
|
|
|
+ encrypted_data = nonce + ciphertext
|
|
|
+ encoded = base64.b64encode(encrypted_data).decode("ascii")
|
|
|
+
|
|
|
+ return f"encrypted:AES256-GCM:{encoded}"
|
|
|
+
|
|
|
+
|
|
|
+def decrypt_content(resource_id: str, encrypted_text: str, provided_key: Optional[str] = None) -> str:
|
|
|
+ """解密内容,如果没有提供密钥或密钥错误,返回[ENCRYPTED]"""
|
|
|
+ if not encrypted_text:
|
|
|
+ return ""
|
|
|
+
|
|
|
+ if not encrypted_text.startswith("encrypted:AES256-GCM:"):
|
|
|
+ # 未加密的内容,直接返回
|
|
|
+ return encrypted_text
|
|
|
+
|
|
|
+ # 提取加密数据
|
|
|
+ encoded = encrypted_text.split(":", 2)[2]
|
|
|
+ encrypted_data = base64.b64decode(encoded)
|
|
|
+
|
|
|
+ nonce = encrypted_data[:12]
|
|
|
+ ciphertext = encrypted_data[12:]
|
|
|
+
|
|
|
+ # 获取密钥
|
|
|
+ key = None
|
|
|
+ if provided_key:
|
|
|
+ # 使用提供的密钥
|
|
|
+ try:
|
|
|
+ key = base64.b64decode(provided_key)
|
|
|
+ except Exception:
|
|
|
+ return "[ENCRYPTED]"
|
|
|
+ else:
|
|
|
+ # 从配置中获取
|
|
|
+ key = get_org_key(resource_id)
|
|
|
+
|
|
|
+ if not key:
|
|
|
+ return "[ENCRYPTED]"
|
|
|
+
|
|
|
+ try:
|
|
|
+ aesgcm = AESGCM(key)
|
|
|
+ plaintext = aesgcm.decrypt(nonce, ciphertext, None)
|
|
|
+ return plaintext.decode("utf-8")
|
|
|
+ except Exception:
|
|
|
+ return "[ENCRYPTED]"
|
|
|
+
|
|
|
+
|
|
|
def init_db():
|
|
|
conn = get_db()
|
|
|
conn.execute("""
|
|
|
@@ -64,13 +146,17 @@ def init_db():
|
|
|
conn.execute("CREATE INDEX IF NOT EXISTS idx_name ON experiences(name)")
|
|
|
|
|
|
conn.execute("""
|
|
|
- CREATE TABLE IF NOT EXISTS contents (
|
|
|
+ CREATE TABLE IF NOT EXISTS resources (
|
|
|
id TEXT PRIMARY KEY,
|
|
|
title TEXT DEFAULT '',
|
|
|
body TEXT NOT NULL,
|
|
|
+ secure_body TEXT DEFAULT '',
|
|
|
+ content_type TEXT DEFAULT 'text',
|
|
|
+ metadata TEXT DEFAULT '{}',
|
|
|
sort_order INTEGER DEFAULT 0,
|
|
|
submitted_by TEXT DEFAULT '',
|
|
|
- created_at TEXT NOT NULL
|
|
|
+ created_at TEXT NOT NULL,
|
|
|
+ updated_at TEXT DEFAULT ''
|
|
|
)
|
|
|
""")
|
|
|
|
|
|
@@ -84,6 +170,7 @@ def init_db():
|
|
|
scopes TEXT DEFAULT '["org:cybertogether"]', -- JSON array
|
|
|
owner TEXT DEFAULT '',
|
|
|
content TEXT NOT NULL,
|
|
|
+ resource_ids TEXT DEFAULT '[]', -- JSON array: ["code/selenium/login", "credentials/website"]
|
|
|
source TEXT DEFAULT '{}', -- JSON object: {name, category, urls, agent_id, submitted_by, timestamp}
|
|
|
eval TEXT DEFAULT '{}', -- JSON object: {score, helpful, harmful, confidence, histories}
|
|
|
created_at TEXT NOT NULL,
|
|
|
@@ -101,14 +188,26 @@ def init_db():
|
|
|
|
|
|
# --- Models ---
|
|
|
|
|
|
-class ContentIn(BaseModel):
|
|
|
+class ResourceIn(BaseModel):
|
|
|
id: str
|
|
|
title: str = ""
|
|
|
body: str
|
|
|
+ secure_body: str = ""
|
|
|
+ content_type: str = "text" # text|code|credential|cookie
|
|
|
+ metadata: dict = {}
|
|
|
sort_order: int = 0
|
|
|
submitted_by: str = ""
|
|
|
|
|
|
|
|
|
+class ResourcePatchIn(BaseModel):
|
|
|
+ """PATCH /api/resource/{id} 请求体"""
|
|
|
+ title: Optional[str] = None
|
|
|
+ body: Optional[str] = None
|
|
|
+ secure_body: Optional[str] = None
|
|
|
+ content_type: Optional[str] = None
|
|
|
+ metadata: Optional[dict] = None
|
|
|
+
|
|
|
+
|
|
|
# Knowledge Models
|
|
|
class KnowledgeIn(BaseModel):
|
|
|
task: str
|
|
|
@@ -118,6 +217,7 @@ class KnowledgeIn(BaseModel):
|
|
|
scopes: list[str] = ["org:cybertogether"]
|
|
|
owner: str = ""
|
|
|
message_id: str = ""
|
|
|
+ resource_ids: list[str] = []
|
|
|
source: dict = {} # {name, category, urls, agent_id, submitted_by, timestamp}
|
|
|
eval: dict = {} # {score, helpful, harmful, confidence}
|
|
|
|
|
|
@@ -131,6 +231,7 @@ class KnowledgeOut(BaseModel):
|
|
|
scopes: list[str]
|
|
|
owner: str
|
|
|
content: str
|
|
|
+ resource_ids: list[str]
|
|
|
source: dict
|
|
|
eval: dict
|
|
|
created_at: str
|
|
|
@@ -171,19 +272,22 @@ class KnowledgeSearchResponse(BaseModel):
|
|
|
count: int
|
|
|
|
|
|
|
|
|
-class ContentNode(BaseModel):
|
|
|
+class ResourceNode(BaseModel):
|
|
|
id: str
|
|
|
title: str
|
|
|
|
|
|
|
|
|
-class ContentOut(BaseModel):
|
|
|
+class ResourceOut(BaseModel):
|
|
|
id: str
|
|
|
title: str
|
|
|
body: str
|
|
|
- toc: Optional[ContentNode] = None
|
|
|
- children: list[ContentNode]
|
|
|
- prev: Optional[ContentNode] = None
|
|
|
- next: Optional[ContentNode] = None
|
|
|
+ secure_body: str = ""
|
|
|
+ content_type: str = "text"
|
|
|
+ metadata: dict = {}
|
|
|
+ toc: Optional[ResourceNode] = None
|
|
|
+ children: list[ResourceNode]
|
|
|
+ prev: Optional[ResourceNode] = None
|
|
|
+ next: Optional[ResourceNode] = None
|
|
|
|
|
|
|
|
|
# --- App ---
|
|
|
@@ -199,75 +303,99 @@ app = FastAPI(title=BRAND_NAME, lifespan=lifespan)
|
|
|
|
|
|
# --- Knowledge API ---
|
|
|
|
|
|
-@app.post("/api/content", status_code=201)
|
|
|
-def submit_content(content: ContentIn):
|
|
|
+@app.post("/api/resource", status_code=201)
|
|
|
+def submit_resource(resource: ResourceIn):
|
|
|
conn = get_db()
|
|
|
try:
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
+
|
|
|
+ # 加密敏感内容
|
|
|
+ encrypted_secure_body = encrypt_content(resource.id, resource.secure_body)
|
|
|
+
|
|
|
conn.execute(
|
|
|
- "INSERT OR REPLACE INTO contents"
|
|
|
- "(id, title, body, sort_order, submitted_by, created_at)"
|
|
|
- " VALUES (?, ?, ?, ?, ?, ?)",
|
|
|
- (content.id, content.title, content.body, content.sort_order, content.submitted_by, now),
|
|
|
+ "INSERT OR REPLACE INTO resources"
|
|
|
+ "(id, title, body, secure_body, content_type, metadata, sort_order, submitted_by, created_at, updated_at)"
|
|
|
+ " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
|
|
|
+ (
|
|
|
+ resource.id,
|
|
|
+ resource.title,
|
|
|
+ resource.body,
|
|
|
+ encrypted_secure_body,
|
|
|
+ resource.content_type,
|
|
|
+ json.dumps(resource.metadata),
|
|
|
+ resource.sort_order,
|
|
|
+ resource.submitted_by,
|
|
|
+ now,
|
|
|
+ now,
|
|
|
+ ),
|
|
|
)
|
|
|
conn.commit()
|
|
|
- return {"status": "ok"}
|
|
|
+ return {"status": "ok", "id": resource.id}
|
|
|
finally:
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
-@app.get("/api/content/{content_id:path}", response_model=ContentOut)
|
|
|
-def get_content(content_id: str):
|
|
|
+@app.get("/api/resource/{resource_id:path}", response_model=ResourceOut)
|
|
|
+def get_resource(resource_id: str, x_org_key: Optional[str] = Header(None)):
|
|
|
conn = get_db()
|
|
|
try:
|
|
|
row = conn.execute(
|
|
|
- "SELECT id, title, body, sort_order FROM contents WHERE id = ?",
|
|
|
- (content_id,),
|
|
|
+ "SELECT id, title, body, secure_body, content_type, metadata, sort_order FROM resources WHERE id = ?",
|
|
|
+ (resource_id,),
|
|
|
).fetchone()
|
|
|
if not row:
|
|
|
- raise HTTPException(status_code=404, detail=f"Content not found: {content_id}")
|
|
|
+ raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
|
|
|
+
|
|
|
+ # 解密敏感内容
|
|
|
+ secure_body = decrypt_content(resource_id, row["secure_body"] or "", x_org_key)
|
|
|
+
|
|
|
+ # 解析metadata
|
|
|
+ metadata = json.loads(row["metadata"] or "{}")
|
|
|
|
|
|
# 计算导航上下文
|
|
|
- root_id = content_id.split("/")[0] if "/" in content_id else content_id
|
|
|
+ root_id = resource_id.split("/")[0] if "/" in resource_id else resource_id
|
|
|
|
|
|
# TOC (根节点)
|
|
|
toc = None
|
|
|
- if "/" in content_id:
|
|
|
+ if "/" in resource_id:
|
|
|
toc_row = conn.execute(
|
|
|
- "SELECT id, title FROM contents WHERE id = ?",
|
|
|
+ "SELECT id, title FROM resources WHERE id = ?",
|
|
|
(root_id,),
|
|
|
).fetchone()
|
|
|
if toc_row:
|
|
|
- toc = ContentNode(id=toc_row["id"], title=toc_row["title"])
|
|
|
+ toc = ResourceNode(id=toc_row["id"], title=toc_row["title"])
|
|
|
|
|
|
# Children (子节点)
|
|
|
children = []
|
|
|
children_rows = conn.execute(
|
|
|
- "SELECT id, title FROM contents WHERE id LIKE ? AND id != ? ORDER BY sort_order",
|
|
|
- (f"{content_id}/%", content_id),
|
|
|
+ "SELECT id, title FROM resources WHERE id LIKE ? AND id != ? ORDER BY sort_order",
|
|
|
+ (f"{resource_id}/%", resource_id),
|
|
|
).fetchall()
|
|
|
- children = [ContentNode(id=r["id"], title=r["title"]) for r in children_rows]
|
|
|
+ children = [ResourceNode(id=r["id"], title=r["title"]) for r in children_rows]
|
|
|
|
|
|
# Prev/Next (同级节点)
|
|
|
prev_node = None
|
|
|
next_node = None
|
|
|
- if "/" in content_id:
|
|
|
+ if "/" in resource_id:
|
|
|
siblings = conn.execute(
|
|
|
- "SELECT id, title, sort_order FROM contents WHERE id LIKE ? AND id NOT LIKE ? ORDER BY sort_order",
|
|
|
+ "SELECT id, title, sort_order FROM resources WHERE id LIKE ? AND id NOT LIKE ? ORDER BY sort_order",
|
|
|
(f"{root_id}/%", f"{root_id}/%/%"),
|
|
|
).fetchall()
|
|
|
for i, sib in enumerate(siblings):
|
|
|
- if sib["id"] == content_id:
|
|
|
+ if sib["id"] == resource_id:
|
|
|
if i > 0:
|
|
|
- prev_node = ContentNode(id=siblings[i-1]["id"], title=siblings[i-1]["title"])
|
|
|
+ prev_node = ResourceNode(id=siblings[i-1]["id"], title=siblings[i-1]["title"])
|
|
|
if i < len(siblings) - 1:
|
|
|
- next_node = ContentNode(id=siblings[i+1]["id"], title=siblings[i+1]["title"])
|
|
|
+ next_node = ResourceNode(id=siblings[i+1]["id"], title=siblings[i+1]["title"])
|
|
|
break
|
|
|
|
|
|
- return ContentOut(
|
|
|
+ return ResourceOut(
|
|
|
id=row["id"],
|
|
|
title=row["title"],
|
|
|
body=row["body"],
|
|
|
+ secure_body=secure_body,
|
|
|
+ content_type=row["content_type"],
|
|
|
+ metadata=metadata,
|
|
|
toc=toc,
|
|
|
children=children,
|
|
|
prev=prev_node,
|
|
|
@@ -277,6 +405,97 @@ def get_content(content_id: str):
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
+@app.patch("/api/resource/{resource_id:path}")
|
|
|
+def patch_resource(resource_id: str, patch: ResourcePatchIn):
|
|
|
+ """更新resource字段"""
|
|
|
+ conn = get_db()
|
|
|
+ try:
|
|
|
+ # 检查是否存在
|
|
|
+ row = conn.execute("SELECT id FROM resources WHERE id = ?", (resource_id,)).fetchone()
|
|
|
+ if not row:
|
|
|
+ raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
|
|
|
+
|
|
|
+ # 构建更新语句
|
|
|
+ updates = []
|
|
|
+ params = []
|
|
|
+
|
|
|
+ if patch.title is not None:
|
|
|
+ updates.append("title = ?")
|
|
|
+ params.append(patch.title)
|
|
|
+
|
|
|
+ if patch.body is not None:
|
|
|
+ updates.append("body = ?")
|
|
|
+ params.append(patch.body)
|
|
|
+
|
|
|
+ if patch.secure_body is not None:
|
|
|
+ encrypted = encrypt_content(resource_id, patch.secure_body)
|
|
|
+ updates.append("secure_body = ?")
|
|
|
+ params.append(encrypted)
|
|
|
+
|
|
|
+ if patch.content_type is not None:
|
|
|
+ updates.append("content_type = ?")
|
|
|
+ params.append(patch.content_type)
|
|
|
+
|
|
|
+ if patch.metadata is not None:
|
|
|
+ updates.append("metadata = ?")
|
|
|
+ params.append(json.dumps(patch.metadata))
|
|
|
+
|
|
|
+ if not updates:
|
|
|
+ return {"status": "ok", "message": "No fields to update"}
|
|
|
+
|
|
|
+ # 添加updated_at
|
|
|
+ updates.append("updated_at = ?")
|
|
|
+ params.append(datetime.now(timezone.utc).isoformat())
|
|
|
+
|
|
|
+ # 执行更新
|
|
|
+ params.append(resource_id)
|
|
|
+ sql = f"UPDATE resources SET {', '.join(updates)} WHERE id = ?"
|
|
|
+ conn.execute(sql, params)
|
|
|
+ conn.commit()
|
|
|
+
|
|
|
+ return {"status": "ok", "id": resource_id}
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+
|
|
|
+@app.get("/api/resource")
|
|
|
+def list_resources(
|
|
|
+ content_type: Optional[str] = Query(None),
|
|
|
+ limit: int = Query(100, ge=1, le=1000)
|
|
|
+):
|
|
|
+ """列出所有resource"""
|
|
|
+ conn = get_db()
|
|
|
+ try:
|
|
|
+ sql = "SELECT id, title, content_type, metadata, created_at FROM resources"
|
|
|
+ params = []
|
|
|
+
|
|
|
+ if content_type:
|
|
|
+ sql += " WHERE content_type = ?"
|
|
|
+ params.append(content_type)
|
|
|
+
|
|
|
+ sql += " ORDER BY id LIMIT ?"
|
|
|
+ params.append(limit)
|
|
|
+
|
|
|
+ rows = conn.execute(sql, params).fetchall()
|
|
|
+
|
|
|
+ results = []
|
|
|
+ for row in rows:
|
|
|
+ results.append({
|
|
|
+ "id": row["id"],
|
|
|
+ "title": row["title"],
|
|
|
+ "content_type": row["content_type"],
|
|
|
+ "metadata": json.loads(row["metadata"] or "{}"),
|
|
|
+ "created_at": row["created_at"],
|
|
|
+ })
|
|
|
+
|
|
|
+ return {"results": results, "count": len(results)}
|
|
|
+ finally:
|
|
|
+ conn.close()
|
|
|
+
|
|
|
+
|
|
|
+# --- Knowledge API ---
|
|
|
+
|
|
|
+
|
|
|
# ===== Knowledge API =====
|
|
|
|
|
|
# 两阶段检索逻辑
|
|
|
@@ -313,7 +532,7 @@ async def _route_knowledge_by_llm(query_text: str, metadata_list: list[dict], k:
|
|
|
|
|
|
response = await openrouter_llm_call(
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
- model="google/gemini-2.0-flash-001"
|
|
|
+ model="google/gemini-2.5-flash-lite"
|
|
|
)
|
|
|
|
|
|
content = response.get("content", "").strip()
|
|
|
@@ -521,8 +740,8 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
conn.execute(
|
|
|
"""INSERT INTO knowledge
|
|
|
(id, message_id, types, task, tags, scopes, owner, content,
|
|
|
- source, eval, created_at, updated_at)
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
+ resource_ids, source, eval, created_at, updated_at)
|
|
|
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
(
|
|
|
knowledge_id,
|
|
|
knowledge.message_id,
|
|
|
@@ -532,6 +751,7 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
json.dumps(knowledge.scopes),
|
|
|
owner,
|
|
|
knowledge.content,
|
|
|
+ json.dumps(knowledge.resource_ids),
|
|
|
json.dumps(source),
|
|
|
json.dumps(eval_data),
|
|
|
now,
|
|
|
@@ -655,6 +875,7 @@ def get_knowledge(knowledge_id: str):
|
|
|
"scopes": json.loads(row["scopes"]),
|
|
|
"owner": row["owner"],
|
|
|
"content": row["content"],
|
|
|
+ "resource_ids": json.loads(row["resource_ids"]),
|
|
|
"source": json.loads(row["source"]),
|
|
|
"eval": json.loads(row["eval"]),
|
|
|
"created_at": row["created_at"],
|
|
|
@@ -683,7 +904,7 @@ async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
|
|
|
try:
|
|
|
response = await openrouter_llm_call(
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
- model="google/gemini-2.0-flash-001"
|
|
|
+ model="google/gemini-2.5-flash-lite"
|
|
|
)
|
|
|
evolved = response.get("content", "").strip()
|
|
|
if len(evolved) < 5:
|
|
|
@@ -854,7 +1075,7 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
|
|
|
|
|
|
@app.post("/api/knowledge/slim")
|
|
|
-async def slim_knowledge(model: str = "google/gemini-2.0-flash-001"):
|
|
|
+async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
|
|
|
"""知识库瘦身:合并语义相似知识"""
|
|
|
conn = get_db()
|
|
|
try:
|
|
|
@@ -1084,7 +1305,7 @@ async def extract_knowledge_from_messages(extract_req: MessageExtractIn):
|
|
|
|
|
|
response = await openrouter_llm_call(
|
|
|
messages=[{"role": "user", "content": prompt}],
|
|
|
- model="google/gemini-2.0-flash-001"
|
|
|
+ model="google/gemini-2.5-flash-lite"
|
|
|
)
|
|
|
|
|
|
content = response.get("content", "").strip()
|