| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964 |
- """
- KnowHub Server
- Agent 工具使用经验的共享平台。
- FastAPI + Milvus Lite(知识)+ SQLite(资源),单文件部署。
- """
- import os
- import re
- import json
- import sqlite3
- import asyncio
- import base64
- import time
- import uuid
- from contextlib import asynccontextmanager
- from datetime import datetime, timezone
- from typing import Optional, List
- from pathlib import Path
- from cryptography.hazmat.primitives.ciphers.aead import AESGCM
- from fastapi import FastAPI, HTTPException, Query, Header, Body
- from fastapi.responses import HTMLResponse
- from pydantic import BaseModel, Field
- # 导入 LLM 调用(需要 agent 模块在 Python path 中)
- import sys
- sys.path.insert(0, str(Path(__file__).parent.parent))
- # 加载环境变量
- from dotenv import load_dotenv
- load_dotenv(Path(__file__).parent.parent / ".env")
- from agent.llm.openrouter import openrouter_llm_call
- # 导入向量存储和 embedding
- from knowhub.vector_store import MilvusStore
- from knowhub.embeddings import get_embedding, get_embeddings_batch
- BRAND_NAME = os.getenv("BRAND_NAME", "KnowHub")
- BRAND_API_ENV = os.getenv("BRAND_API_ENV", "KNOWHUB_API")
- BRAND_DB = os.getenv("BRAND_DB", "knowhub.db")
- # 组织密钥配置(格式:org1:key1_base64,org2:key2_base64)
- ORG_KEYS_RAW = os.getenv("ORG_KEYS", "")
- ORG_KEYS = {}
- if ORG_KEYS_RAW:
- for pair in ORG_KEYS_RAW.split(","):
- if ":" in pair:
- org, key_b64 = pair.split(":", 1)
- ORG_KEYS[org.strip()] = key_b64.strip()
- DB_PATH = Path(__file__).parent / BRAND_DB
- MILVUS_DATA_DIR = Path(__file__).parent / "milvus_data"
- # 全局 Milvus 存储实例
- milvus_store: Optional[MilvusStore] = None
- # --- 数据库 ---
- def get_db() -> sqlite3.Connection:
- conn = sqlite3.connect(str(DB_PATH))
- conn.row_factory = sqlite3.Row
- conn.execute("PRAGMA journal_mode=WAL")
- return conn
- # --- 加密/解密 ---
- def get_org_key(resource_id: str) -> Optional[bytes]:
- """从content_id提取组织前缀,返回对应密钥"""
- if "/" in resource_id:
- org = resource_id.split("/")[0]
- if org in ORG_KEYS:
- return base64.b64decode(ORG_KEYS[org])
- return None
- def encrypt_content(resource_id: str, plaintext: str) -> str:
- """加密内容,返回格式:encrypted:AES256-GCM:{base64_data}"""
- if not plaintext:
- return ""
- key = get_org_key(resource_id)
- if not key:
- # 没有配置密钥,明文存储(不推荐)
- return plaintext
- aesgcm = AESGCM(key)
- nonce = os.urandom(12) # 96-bit nonce
- ciphertext = aesgcm.encrypt(nonce, plaintext.encode("utf-8"), None)
- # 组合 nonce + ciphertext
- encrypted_data = nonce + ciphertext
- encoded = base64.b64encode(encrypted_data).decode("ascii")
- return f"encrypted:AES256-GCM:{encoded}"
- def decrypt_content(resource_id: str, encrypted_text: str, provided_key: Optional[str] = None) -> str:
- """解密内容,如果没有提供密钥或密钥错误,返回[ENCRYPTED]"""
- if not encrypted_text:
- return ""
- if not encrypted_text.startswith("encrypted:AES256-GCM:"):
- # 未加密的内容,直接返回
- return encrypted_text
- # 提取加密数据
- encoded = encrypted_text.split(":", 2)[2]
- encrypted_data = base64.b64decode(encoded)
- nonce = encrypted_data[:12]
- ciphertext = encrypted_data[12:]
- # 获取密钥
- key = None
- if provided_key:
- # 使用提供的密钥
- try:
- key = base64.b64decode(provided_key)
- except Exception:
- return "[ENCRYPTED]"
- else:
- # 从配置中获取
- key = get_org_key(resource_id)
- if not key:
- return "[ENCRYPTED]"
- try:
- aesgcm = AESGCM(key)
- plaintext = aesgcm.decrypt(nonce, ciphertext, None)
- return plaintext.decode("utf-8")
- except Exception:
- return "[ENCRYPTED]"
- def serialize_milvus_result(data):
- """将 Milvus 返回的数据转换为可序列化的字典"""
- # 基本类型直接返回
- if data is None or isinstance(data, (str, int, float, bool)):
- return data
- # 字典类型递归处理
- if isinstance(data, dict):
- return {k: serialize_milvus_result(v) for k, v in data.items()}
- # 列表/元组类型递归处理
- if isinstance(data, (list, tuple)):
- return [serialize_milvus_result(item) for item in data]
- # 尝试转换为字典(对于有 to_dict 方法的对象)
- if hasattr(data, 'to_dict') and callable(getattr(data, 'to_dict')):
- try:
- return serialize_milvus_result(data.to_dict())
- except:
- pass
- # 尝试转换为列表(对于可迭代对象,如 RepeatedScalarContainer)
- if hasattr(data, '__iter__') and not isinstance(data, (str, bytes, dict)):
- try:
- # 强制转换为列表并递归处理
- result = []
- for item in data:
- result.append(serialize_milvus_result(item))
- return result
- except:
- pass
- # 尝试获取对象的属性字典
- if hasattr(data, '__dict__'):
- try:
- return serialize_milvus_result(vars(data))
- except:
- pass
- # 最后的 fallback:对于无法处理的类型,返回 None 而不是字符串表示
- # 这样可以避免产生无法序列化的字符串
- return None
- def init_db():
- """初始化 SQLite(仅用于 resources)"""
- conn = get_db()
- conn.execute("""
- CREATE TABLE IF NOT EXISTS experiences (
- id INTEGER PRIMARY KEY AUTOINCREMENT,
- name TEXT NOT NULL,
- url TEXT DEFAULT '',
- category TEXT DEFAULT '',
- task TEXT NOT NULL,
- score INTEGER CHECK(score BETWEEN 1 AND 5),
- outcome TEXT DEFAULT '',
- tips TEXT DEFAULT '',
- content_id TEXT DEFAULT '',
- submitted_by TEXT DEFAULT '',
- created_at TEXT NOT NULL
- )
- """)
- conn.execute("CREATE INDEX IF NOT EXISTS idx_name ON experiences(name)")
- conn.execute("""
- CREATE TABLE IF NOT EXISTS resources (
- id TEXT PRIMARY KEY,
- title TEXT DEFAULT '',
- body TEXT NOT NULL,
- secure_body TEXT DEFAULT '',
- content_type TEXT DEFAULT 'text',
- metadata TEXT DEFAULT '{}',
- sort_order INTEGER DEFAULT 0,
- submitted_by TEXT DEFAULT '',
- created_at TEXT NOT NULL,
- updated_at TEXT DEFAULT ''
- )
- """)
- conn.commit()
- conn.close()
- # --- Models ---
- class ResourceIn(BaseModel):
- id: str
- title: str = ""
- body: str
- secure_body: str = ""
- content_type: str = "text" # text|code|credential|cookie
- metadata: dict = {}
- sort_order: int = 0
- submitted_by: str = ""
- class ResourcePatchIn(BaseModel):
- """PATCH /api/resource/{id} 请求体"""
- title: Optional[str] = None
- body: Optional[str] = None
- secure_body: Optional[str] = None
- content_type: Optional[str] = None
- metadata: Optional[dict] = None
- # Knowledge Models
- class KnowledgeIn(BaseModel):
- task: str
- content: str
- types: list[str] = ["strategy"]
- tags: dict = {}
- scopes: list[str] = ["org:cybertogether"]
- owner: str = ""
- message_id: str = ""
- resource_ids: list[str] = []
- source: dict = {} # {name, category, urls, agent_id, submitted_by, timestamp}
- eval: dict = {} # {score, helpful, harmful, confidence}
- class KnowledgeOut(BaseModel):
- id: str
- message_id: str
- types: list[str]
- task: str
- tags: dict
- scopes: list[str]
- owner: str
- content: str
- resource_ids: list[str]
- source: dict
- eval: dict
- created_at: str
- updated_at: str
- class KnowledgeUpdateIn(BaseModel):
- add_helpful_case: Optional[dict] = None
- add_harmful_case: Optional[dict] = None
- update_score: Optional[int] = Field(default=None, ge=1, le=5)
- evolve_feedback: Optional[str] = None
- class KnowledgePatchIn(BaseModel):
- """PATCH /api/knowledge/{id} 请求体(直接字段编辑)"""
- task: Optional[str] = None
- content: Optional[str] = None
- types: Optional[list[str]] = None
- tags: Optional[dict] = None
- scopes: Optional[list[str]] = None
- owner: Optional[str] = None
- class MessageExtractIn(BaseModel):
- """POST /api/extract 请求体(消息历史提取)"""
- messages: list[dict] # [{role: str, content: str}, ...]
- agent_id: str = "unknown"
- submitted_by: str # 必填,作为 owner
- session_key: str = ""
- class KnowledgeBatchUpdateIn(BaseModel):
- feedback_list: list[dict]
- class KnowledgeSearchResponse(BaseModel):
- results: list[dict]
- count: int
- class ResourceNode(BaseModel):
- id: str
- title: str
- class ResourceOut(BaseModel):
- id: str
- title: str
- body: str
- secure_body: str = ""
- content_type: str = "text"
- metadata: dict = {}
- toc: Optional[ResourceNode] = None
- children: list[ResourceNode]
- prev: Optional[ResourceNode] = None
- next: Optional[ResourceNode] = None
- # --- App ---
- @asynccontextmanager
- async def lifespan(app: FastAPI):
- global milvus_store
- # 初始化 SQLite(resources)
- init_db()
- # 初始化 Milvus Lite(knowledge)
- milvus_store = MilvusStore(data_dir=str(MILVUS_DATA_DIR))
- yield
- # 清理(Milvus Lite 会自动处理)
- app = FastAPI(title=BRAND_NAME, lifespan=lifespan)
- # --- Knowledge API ---
- @app.post("/api/resource", status_code=201)
- def submit_resource(resource: ResourceIn):
- conn = get_db()
- try:
- now = datetime.now(timezone.utc).isoformat()
- # 加密敏感内容
- encrypted_secure_body = encrypt_content(resource.id, resource.secure_body)
- conn.execute(
- "INSERT OR REPLACE INTO resources"
- "(id, title, body, secure_body, content_type, metadata, sort_order, submitted_by, created_at, updated_at)"
- " VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)",
- (
- resource.id,
- resource.title,
- resource.body,
- encrypted_secure_body,
- resource.content_type,
- json.dumps(resource.metadata),
- resource.sort_order,
- resource.submitted_by,
- now,
- now,
- ),
- )
- conn.commit()
- return {"status": "ok", "id": resource.id}
- finally:
- conn.close()
- @app.get("/api/resource/{resource_id:path}", response_model=ResourceOut)
- def get_resource(resource_id: str, x_org_key: Optional[str] = Header(None)):
- conn = get_db()
- try:
- row = conn.execute(
- "SELECT id, title, body, secure_body, content_type, metadata, sort_order FROM resources WHERE id = ?",
- (resource_id,),
- ).fetchone()
- if not row:
- raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
- # 解密敏感内容
- secure_body = decrypt_content(resource_id, row["secure_body"] or "", x_org_key)
- # 解析metadata
- metadata = json.loads(row["metadata"] or "{}")
- # 计算导航上下文
- root_id = resource_id.split("/")[0] if "/" in resource_id else resource_id
- # TOC (根节点)
- toc = None
- if "/" in resource_id:
- toc_row = conn.execute(
- "SELECT id, title FROM resources WHERE id = ?",
- (root_id,),
- ).fetchone()
- if toc_row:
- toc = ResourceNode(id=toc_row["id"], title=toc_row["title"])
- # Children (子节点)
- children = []
- children_rows = conn.execute(
- "SELECT id, title FROM resources WHERE id LIKE ? AND id != ? ORDER BY sort_order",
- (f"{resource_id}/%", resource_id),
- ).fetchall()
- children = [ResourceNode(id=r["id"], title=r["title"]) for r in children_rows]
- # Prev/Next (同级节点)
- prev_node = None
- next_node = None
- if "/" in resource_id:
- siblings = conn.execute(
- "SELECT id, title, sort_order FROM resources WHERE id LIKE ? AND id NOT LIKE ? ORDER BY sort_order",
- (f"{root_id}/%", f"{root_id}/%/%"),
- ).fetchall()
- for i, sib in enumerate(siblings):
- if sib["id"] == resource_id:
- if i > 0:
- prev_node = ResourceNode(id=siblings[i-1]["id"], title=siblings[i-1]["title"])
- if i < len(siblings) - 1:
- next_node = ResourceNode(id=siblings[i+1]["id"], title=siblings[i+1]["title"])
- break
- return ResourceOut(
- id=row["id"],
- title=row["title"],
- body=row["body"],
- secure_body=secure_body,
- content_type=row["content_type"],
- metadata=metadata,
- toc=toc,
- children=children,
- prev=prev_node,
- next=next_node,
- )
- finally:
- conn.close()
- @app.patch("/api/resource/{resource_id:path}")
- def patch_resource(resource_id: str, patch: ResourcePatchIn):
- """更新resource字段"""
- conn = get_db()
- try:
- # 检查是否存在
- row = conn.execute("SELECT id FROM resources WHERE id = ?", (resource_id,)).fetchone()
- if not row:
- raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
- # 构建更新语句
- updates = []
- params = []
- if patch.title is not None:
- updates.append("title = ?")
- params.append(patch.title)
- if patch.body is not None:
- updates.append("body = ?")
- params.append(patch.body)
- if patch.secure_body is not None:
- encrypted = encrypt_content(resource_id, patch.secure_body)
- updates.append("secure_body = ?")
- params.append(encrypted)
- if patch.content_type is not None:
- updates.append("content_type = ?")
- params.append(patch.content_type)
- if patch.metadata is not None:
- updates.append("metadata = ?")
- params.append(json.dumps(patch.metadata))
- if not updates:
- return {"status": "ok", "message": "No fields to update"}
- # 添加updated_at
- updates.append("updated_at = ?")
- params.append(datetime.now(timezone.utc).isoformat())
- # 执行更新
- params.append(resource_id)
- sql = f"UPDATE resources SET {', '.join(updates)} WHERE id = ?"
- conn.execute(sql, params)
- conn.commit()
- return {"status": "ok", "id": resource_id}
- finally:
- conn.close()
- @app.get("/api/resource")
- def list_resources(
- content_type: Optional[str] = Query(None),
- limit: int = Query(100, ge=1, le=1000)
- ):
- """列出所有resource"""
- conn = get_db()
- try:
- sql = "SELECT id, title, content_type, metadata, created_at FROM resources"
- params = []
- if content_type:
- sql += " WHERE content_type = ?"
- params.append(content_type)
- sql += " ORDER BY id LIMIT ?"
- params.append(limit)
- rows = conn.execute(sql, params).fetchall()
- results = []
- for row in rows:
- results.append({
- "id": row["id"],
- "title": row["title"],
- "content_type": row["content_type"],
- "metadata": json.loads(row["metadata"] or "{}"),
- "created_at": row["created_at"],
- })
- return {"results": results, "count": len(results)}
- finally:
- conn.close()
- # --- Knowledge API ---
- # ===== Knowledge API =====
- async def _llm_rerank(query: str, candidates: list[dict], top_k: int) -> list[str]:
- """
- 使用 LLM 对候选知识进行精排
- Args:
- query: 查询文本
- candidates: 候选知识列表
- top_k: 返回数量
- Returns:
- 排序后的知识 ID 列表
- """
- if not candidates:
- return []
- # 构造 prompt
- candidates_text = "\n".join([
- f"[{i+1}] ID: {c['id']}\nTask: {c['task']}\nContent: {c['content'][:200]}..."
- for i, c in enumerate(candidates)
- ])
- prompt = f"""你是知识检索专家。根据用户查询,从候选知识中选出最相关的 {top_k} 条。
- 用户查询:"{query}"
- 候选知识:
- {candidates_text}
- 请输出最相关的 {top_k} 个知识 ID,按相关性从高到低排序,用逗号分隔。
- 只输出 ID,不要其他内容。"""
- try:
- response = await openrouter_llm_call(
- messages=[{"role": "user", "content": prompt}],
- model="google/gemini-2.5-flash-lite"
- )
- content = response.get("content", "").strip()
- # 解析 ID 列表
- selected_ids = [
- idx.strip()
- for idx in re.split(r'[,\s]+', content)
- if idx.strip().startswith(("knowledge-", "research-"))
- ]
- return selected_ids[:top_k]
- except Exception as e:
- print(f"[LLM Rerank] 失败: {e}")
- return []
- @app.get("/api/knowledge/search")
- async def search_knowledge_api(
- q: str = Query(..., description="查询文本"),
- top_k: int = Query(default=5, ge=1, le=20),
- min_score: int = Query(default=3, ge=1, le=5),
- types: Optional[str] = None,
- owner: Optional[str] = None
- ):
- """检索知识(向量召回 + LLM 精排)"""
- try:
- # 1. 生成查询向量
- query_embedding = await get_embedding(q)
- # 2. 构建过滤表达式
- filters = []
- if types:
- type_list = [t.strip() for t in types.split(',') if t.strip()]
- for t in type_list:
- filters.append(f'array_contains(types, "{t}")')
- if owner:
- filters.append(f'owner == "{owner}"')
- # 添加 min_score 过滤
- filters.append(f'eval["score"] >= {min_score}')
- filter_expr = ' and '.join(filters) if filters else None
- # 3. 向量召回(3*k 个候选)
- recall_limit = top_k * 3
- candidates = milvus_store.search(
- query_embedding=query_embedding,
- filters=filter_expr,
- limit=recall_limit
- )
- if not candidates:
- return {"results": [], "count": 0, "reranked": False}
- # 转换为可序列化的格式
- serialized_candidates = [serialize_milvus_result(c) for c in candidates]
- # 4. LLM 精排
- reranked_ids = await _llm_rerank(q, serialized_candidates, top_k)
- if reranked_ids:
- # 按 LLM 排序返回
- id_to_candidate = {c["id"]: c for c in serialized_candidates}
- results = [id_to_candidate[id] for id in reranked_ids if id in id_to_candidate]
- return {"results": results, "count": len(results), "reranked": True}
- else:
- # Fallback:直接返回向量召回的 top k
- print(f"[Knowledge Search] LLM 精排失败,fallback 到向量 top-{top_k}")
- return {"results": serialized_candidates[:top_k], "count": len(serialized_candidates[:top_k]), "reranked": False}
- except Exception as e:
- print(f"[Knowledge Search] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.post("/api/knowledge", status_code=201)
- async def save_knowledge(knowledge: KnowledgeIn):
- """保存新知识"""
- try:
- # 生成 ID
- timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
- random_suffix = uuid.uuid4().hex[:4]
- knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
- now = int(time.time())
- # 设置默认值
- owner = knowledge.owner or f"agent:{knowledge.source.get('agent_id', 'unknown')}"
- # 准备 source
- source = {
- "name": knowledge.source.get("name", ""),
- "category": knowledge.source.get("category", ""),
- "urls": knowledge.source.get("urls", []),
- "agent_id": knowledge.source.get("agent_id", "unknown"),
- "submitted_by": knowledge.source.get("submitted_by", ""),
- "timestamp": datetime.now(timezone.utc).isoformat(),
- "message_id": knowledge.message_id
- }
- # 准备 eval
- eval_data = {
- "score": knowledge.eval.get("score", 3),
- "helpful": knowledge.eval.get("helpful", 1),
- "harmful": knowledge.eval.get("harmful", 0),
- "confidence": knowledge.eval.get("confidence", 0.5),
- "helpful_history": [],
- "harmful_history": []
- }
- # 生成向量(只基于 task,因为搜索时用户描述的是任务场景)
- embedding = await get_embedding(knowledge.task)
- # 提取 tag keys(用于高效筛选)
- tag_keys = list(knowledge.tags.keys()) if isinstance(knowledge.tags, dict) else []
- # 准备插入数据
- insert_data = {
- "id": knowledge_id,
- "embedding": embedding,
- "message_id": knowledge.message_id,
- "task": knowledge.task,
- "content": knowledge.content,
- "types": knowledge.types,
- "tags": knowledge.tags,
- "tag_keys": tag_keys,
- "scopes": knowledge.scopes,
- "owner": owner,
- "resource_ids": knowledge.resource_ids,
- "source": source,
- "eval": eval_data,
- "created_at": now,
- "updated_at": now,
- }
- print(f"[Save Knowledge] 插入数据: {json.dumps({k: v for k, v in insert_data.items() if k != 'embedding'}, ensure_ascii=False)}")
- # 插入 Milvus
- milvus_store.insert(insert_data)
- return {"status": "ok", "knowledge_id": knowledge_id}
- except Exception as e:
- print(f"[Save Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.get("/api/knowledge")
- def list_knowledge(
- page: int = Query(default=1, ge=1),
- page_size: int = Query(default=200, ge=1, le=500),
- types: Optional[str] = None,
- scopes: Optional[str] = None,
- owner: Optional[str] = None,
- tags: Optional[str] = None
- ):
- """列出知识(支持后端筛选和分页)"""
- try:
- # 构建过滤表达式
- filters = []
- # types 支持多个,用 AND 连接(交集:必须同时包含所有选中的type)
- if types:
- type_list = [t.strip() for t in types.split(',') if t.strip()]
- for t in type_list:
- filters.append(f'array_contains(types, "{t}")')
- if scopes:
- filters.append(f'array_contains(scopes, "{scopes}")')
- if owner:
- filters.append(f'owner like "%{owner}%"')
- # tags 支持多个,用 AND 连接(使用 tag_keys 数组进行高效筛选)
- if tags:
- tag_list = [t.strip() for t in tags.split(',') if t.strip()]
- for t in tag_list:
- filters.append(f'array_contains(tag_keys, "{t}")')
- # 如果没有过滤条件,查询所有
- filter_expr = ' and '.join(filters) if filters else 'id != ""'
- # 查询 Milvus(先获取所有符合条件的数据)
- # Milvus 的 limit 是总数限制,我们需要获取足够多的数据来支持分页
- max_limit = 10000 # 设置一个合理的上限
- results = milvus_store.query(filter_expr, limit=max_limit)
- # 转换为可序列化的格式
- serialized_results = [serialize_milvus_result(r) for r in results]
- # 按 created_at 降序排序(最新的在前)
- serialized_results.sort(key=lambda x: x.get('created_at', 0), reverse=True)
- # 计算分页
- total = len(serialized_results)
- total_pages = (total + page_size - 1) // page_size # 向上取整
- start_idx = (page - 1) * page_size
- end_idx = start_idx + page_size
- page_results = serialized_results[start_idx:end_idx]
- return {
- "results": page_results,
- "pagination": {
- "page": page,
- "page_size": page_size,
- "total": total,
- "total_pages": total_pages
- }
- }
- except Exception as e:
- print(f"[List Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.get("/api/knowledge/meta/tags")
- def get_all_tags():
- """获取所有已有的 tags"""
- try:
- # 查询所有知识
- results = milvus_store.query('id != ""', limit=10000)
- all_tags = set()
- for item in results:
- # 转换为标准字典
- serialized_item = serialize_milvus_result(item)
- tags_dict = serialized_item.get("tags", {})
- if isinstance(tags_dict, dict):
- for key in tags_dict.keys():
- all_tags.add(key)
- return {"tags": sorted(list(all_tags))}
- except Exception as e:
- print(f"[Get Tags] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.get("/api/knowledge/{knowledge_id}")
- def get_knowledge(knowledge_id: str):
- """获取单条知识"""
- try:
- result = milvus_store.get_by_id(knowledge_id)
- if not result:
- raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
- return serialize_milvus_result(result)
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Get Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
- """使用 LLM 进行知识进化重写"""
- prompt = f"""你是一个 AI Agent 知识库管理员。请根据反馈建议,对现有的知识内容进行重写进化。
- 【原知识内容】:
- {old_content}
- 【实战反馈建议】:
- {feedback}
- 【重写要求】:
- 1. 融合知识:将反馈中的避坑指南、新参数或修正后的选择逻辑融入原知识,使其更具通用性和准确性。
- 2. 保持结构:如果原内容有特定格式(如 Markdown、代码示例等),请保持该格式。
- 3. 语言:简洁直接,使用中文。
- 4. 禁止:严禁输出任何开场白、解释语或额外的 Markdown 标题,直接返回重写后的正文。
- """
- try:
- response = await openrouter_llm_call(
- messages=[{"role": "user", "content": prompt}],
- model="google/gemini-2.5-flash-lite"
- )
- evolved = response.get("content", "").strip()
- if len(evolved) < 5:
- raise ValueError("LLM output too short")
- return evolved
- except Exception as e:
- print(f"知识进化失败,采用追加模式回退: {e}")
- return f"{old_content}\n\n---\n[Update {datetime.now().strftime('%Y-%m-%d')}]: {feedback}"
- @app.put("/api/knowledge/{knowledge_id}")
- async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
- """更新知识评估,支持知识进化"""
- try:
- # 获取现有知识
- existing = milvus_store.get_by_id(knowledge_id)
- if not existing:
- raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
- eval_data = existing.get("eval", {})
- # 更新评分
- if update.update_score is not None:
- eval_data["score"] = update.update_score
- # 添加有效案例
- if update.add_helpful_case:
- eval_data["helpful"] = eval_data.get("helpful", 0) + 1
- if "helpful_history" not in eval_data:
- eval_data["helpful_history"] = []
- eval_data["helpful_history"].append(update.add_helpful_case)
- # 添加有害案例
- if update.add_harmful_case:
- eval_data["harmful"] = eval_data.get("harmful", 0) + 1
- if "harmful_history" not in eval_data:
- eval_data["harmful_history"] = []
- eval_data["harmful_history"].append(update.add_harmful_case)
- # 知识进化
- content = existing["content"]
- need_reembed = False
- if update.evolve_feedback:
- content = await _evolve_knowledge_with_llm(content, update.evolve_feedback)
- eval_data["helpful"] = eval_data.get("helpful", 0) + 1
- need_reembed = True
- # 准备更新数据
- updates = {
- "content": content,
- "eval": eval_data,
- }
- # 如果内容变化,重新生成向量
- if need_reembed:
- embedding = await get_embedding(existing['task'])
- updates["embedding"] = embedding
- # 更新 Milvus
- milvus_store.update(knowledge_id, updates)
- return {"status": "ok", "knowledge_id": knowledge_id}
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Update Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.patch("/api/knowledge/{knowledge_id}")
- async def patch_knowledge(knowledge_id: str, patch: KnowledgePatchIn):
- """直接编辑知识字段"""
- try:
- # 获取现有知识
- existing = milvus_store.get_by_id(knowledge_id)
- if not existing:
- raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
- updates = {}
- need_reembed = False
- if patch.task is not None:
- updates["task"] = patch.task
- need_reembed = True
- if patch.content is not None:
- updates["content"] = patch.content
- # content 变化不需要重新生成 embedding(只基于 task)
- if patch.types is not None:
- updates["types"] = patch.types
- if patch.tags is not None:
- updates["tags"] = patch.tags
- # 同时更新 tag_keys
- updates["tag_keys"] = list(patch.tags.keys()) if isinstance(patch.tags, dict) else []
- if patch.scopes is not None:
- updates["scopes"] = patch.scopes
- if patch.owner is not None:
- updates["owner"] = patch.owner
- if not updates:
- return {"status": "ok", "knowledge_id": knowledge_id}
- # 如果 task 变化,重新生成向量
- if need_reembed:
- task = updates.get("task", existing["task"])
- embedding = await get_embedding(task)
- updates["embedding"] = embedding
- # 更新 Milvus
- milvus_store.update(knowledge_id, updates)
- return {"status": "ok", "knowledge_id": knowledge_id}
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Patch Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.delete("/api/knowledge/{knowledge_id}")
- def delete_knowledge(knowledge_id: str):
- """删除单条知识"""
- try:
- # 检查知识是否存在
- existing = milvus_store.get_by_id(knowledge_id)
- if not existing:
- raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
- # 从 Milvus 删除
- milvus_store.collection.delete(expr=f'id == "{knowledge_id}"')
- print(f"[Delete Knowledge] 已删除知识: {knowledge_id}")
- return {"status": "ok", "knowledge_id": knowledge_id}
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Delete Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.post("/api/knowledge/batch_delete")
- def batch_delete_knowledge(knowledge_ids: List[str] = Body(...)):
- """批量删除知识"""
- try:
- if not knowledge_ids:
- raise HTTPException(status_code=400, detail="knowledge_ids cannot be empty")
- # 构建删除表达式
- ids_str = '", "'.join(knowledge_ids)
- expr = f'id in ["{ids_str}"]'
- # 批量删除
- milvus_store.collection.delete(expr=expr)
- print(f"[Batch Delete] 已删除 {len(knowledge_ids)} 条知识")
- return {"status": "ok", "deleted_count": len(knowledge_ids)}
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Batch Delete] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.post("/api/knowledge/batch_update")
- async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
- """批量反馈知识有效性"""
- if not batch.feedback_list:
- return {"status": "ok", "updated": 0}
- try:
- # 先处理无需进化的,收集需要进化的
- evolution_tasks = [] # [(knowledge_id, old_content, feedback, eval_data)]
- simple_updates = [] # [(knowledge_id, is_effective, eval_data)]
- for item in batch.feedback_list:
- knowledge_id = item.get("knowledge_id")
- is_effective = item.get("is_effective")
- feedback = item.get("feedback", "")
- if not knowledge_id:
- continue
- existing = milvus_store.get_by_id(knowledge_id)
- if not existing:
- continue
- eval_data = existing.get("eval", {})
- if is_effective and feedback:
- evolution_tasks.append((knowledge_id, existing["content"], feedback, eval_data, existing["task"]))
- else:
- simple_updates.append((knowledge_id, is_effective, eval_data))
- # 执行简单更新
- for knowledge_id, is_effective, eval_data in simple_updates:
- if is_effective:
- eval_data["helpful"] = eval_data.get("helpful", 0) + 1
- else:
- eval_data["harmful"] = eval_data.get("harmful", 0) + 1
- milvus_store.update(knowledge_id, {"eval": eval_data})
- # 并发执行知识进化
- if evolution_tasks:
- print(f"🧬 并发处理 {len(evolution_tasks)} 条知识进化...")
- evolved_results = await asyncio.gather(
- *[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _, _ in evolution_tasks]
- )
- for (knowledge_id, _, _, eval_data, task), evolved_content in zip(evolution_tasks, evolved_results):
- eval_data["helpful"] = eval_data.get("helpful", 0) + 1
- # 重新生成向量(只基于 task)
- embedding = await get_embedding(task)
- milvus_store.update(knowledge_id, {
- "content": evolved_content,
- "eval": eval_data,
- "embedding": embedding
- })
- return {"status": "ok", "updated": len(simple_updates) + len(evolution_tasks)}
- except Exception as e:
- print(f"[Batch Update] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.post("/api/knowledge/slim")
- async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
- """知识库瘦身:合并语义相似知识"""
- try:
- # 获取所有知识
- all_knowledge = milvus_store.query('id != ""', limit=10000)
- # 转换为可序列化的格式
- all_knowledge = [serialize_milvus_result(item) for item in all_knowledge]
- if len(all_knowledge) < 2:
- return {"status": "ok", "message": f"知识库仅有 {len(all_knowledge)} 条,无需瘦身"}
- # 构造发给大模型的内容
- entries_text = ""
- for item in all_knowledge:
- eval_data = item.get("eval", {})
- types = item.get("types", [])
- entries_text += f"[ID: {item['id']}] [Types: {','.join(types)}] "
- entries_text += f"[Helpful: {eval_data.get('helpful', 0)}, Harmful: {eval_data.get('harmful', 0)}] [Score: {eval_data.get('score', 3)}]\n"
- entries_text += f"Task: {item['task']}\n"
- entries_text += f"Content: {item['content'][:200]}...\n\n"
- prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
- 【任务】:
- 1. 识别语义高度相似或重复的知识,将它们合并为一条更精炼、更通用的知识。
- 2. 合并时保留 helpful 最高的那条的 ID(helpful 取各条之和)。
- 3. 对于独立的、无重复的知识,保持原样不动。
- 【当前知识库】:
- {entries_text}
- 【输出格式要求】:
- 严格按以下格式输出每条知识,条目之间用 === 分隔:
- ID: <保留的id>
- TYPES: <逗号分隔的type列表>
- HELPFUL: <合并后的helpful计数>
- HARMFUL: <合并后的harmful计数>
- SCORE: <评分>
- TASK: <任务描述>
- CONTENT: <合并后的知识内容>
- ===
- 最后输出合并报告:
- REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
- 禁止输出任何开场白或解释。"""
- print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(all_knowledge)} 条知识...")
- response = await openrouter_llm_call(
- messages=[{"role": "user", "content": prompt}],
- model=model
- )
- content = response.get("content", "").strip()
- if not content:
- raise HTTPException(status_code=500, detail="LLM 返回为空")
- # 解析大模型输出
- report_line = ""
- new_entries = []
- blocks = [b.strip() for b in content.split("===") if b.strip()]
- for block in blocks:
- if block.startswith("REPORT:"):
- report_line = block
- continue
- lines = block.split("\n")
- kid, types, helpful, harmful, score, task, content_lines = None, [], 0, 0, 3, "", []
- current_field = None
- for line in lines:
- if line.startswith("ID:"):
- kid = line[3:].strip()
- current_field = None
- elif line.startswith("TYPES:"):
- types_str = line[6:].strip()
- types = [t.strip() for t in types_str.split(",") if t.strip()]
- current_field = None
- elif line.startswith("HELPFUL:"):
- try:
- helpful = int(line[8:].strip())
- except Exception:
- helpful = 0
- current_field = None
- elif line.startswith("HARMFUL:"):
- try:
- harmful = int(line[8:].strip())
- except Exception:
- harmful = 0
- current_field = None
- elif line.startswith("SCORE:"):
- try:
- score = int(line[6:].strip())
- except Exception:
- score = 3
- current_field = None
- elif line.startswith("TASK:"):
- task = line[5:].strip()
- current_field = "task"
- elif line.startswith("CONTENT:"):
- content_lines.append(line[8:].strip())
- current_field = "content"
- elif current_field == "task":
- task += "\n" + line
- elif current_field == "content":
- content_lines.append(line)
- if kid and content_lines:
- new_entries.append({
- "id": kid,
- "types": types if types else ["strategy"],
- "helpful": helpful,
- "harmful": harmful,
- "score": score,
- "task": task.strip(),
- "content": "\n".join(content_lines).strip()
- })
- if not new_entries:
- raise HTTPException(status_code=500, detail="解析大模型输出失败")
- # 生成向量并重建知识库
- print(f"[知识瘦身] 正在为 {len(new_entries)} 条知识生成向量...")
- # 批量生成向量(只基于 task)
- texts = [e['task'] for e in new_entries]
- embeddings = await get_embeddings_batch(texts)
- # 清空并重建
- now = int(time.time())
- milvus_store.drop_collection()
- milvus_store._init_collection()
- knowledge_list = []
- for e, embedding in zip(new_entries, embeddings):
- eval_data = {
- "score": e["score"],
- "helpful": e["helpful"],
- "harmful": e["harmful"],
- "confidence": 0.9,
- "helpful_history": [],
- "harmful_history": []
- }
- source = {
- "name": "slim",
- "category": "exp",
- "urls": [],
- "agent_id": "slim",
- "submitted_by": "system",
- "timestamp": datetime.now(timezone.utc).isoformat()
- }
- knowledge_list.append({
- "id": e["id"],
- "embedding": embedding,
- "message_id": "",
- "task": e["task"],
- "content": e["content"],
- "types": e["types"],
- "tags": {},
- "tag_keys": [],
- "scopes": ["org:cybertogether"],
- "owner": "agent:slim",
- "resource_ids": [],
- "source": source,
- "eval": eval_data,
- "created_at": now,
- "updated_at": now
- })
- milvus_store.insert_batch(knowledge_list)
- result_msg = f"瘦身完成:{len(all_knowledge)} → {len(new_entries)} 条知识"
- if report_line:
- result_msg += f"\n{report_line}"
- print(f"[知识瘦身] {result_msg}")
- return {"status": "ok", "before": len(all_knowledge), "after": len(new_entries), "report": report_line}
- except HTTPException:
- raise
- except Exception as e:
- print(f"[Slim Knowledge] 错误: {e}")
- raise HTTPException(status_code=500, detail=str(e))
- @app.post("/api/extract")
- async def extract_knowledge_from_messages(extract_req: MessageExtractIn):
- """从消息历史中提取知识(LLM 分析)"""
- if not extract_req.submitted_by:
- raise HTTPException(status_code=400, detail="submitted_by is required")
- messages = extract_req.messages
- if not messages or len(messages) == 0:
- return {"status": "ok", "extracted_count": 0, "knowledge_ids": []}
- # 构造消息历史文本
- messages_text = ""
- for msg in messages:
- role = msg.get("role", "unknown")
- content = msg.get("content", "")
- messages_text += f"[{role}]: {content}\n\n"
- # LLM 提取知识
- prompt = f"""你是一个知识提取专家。请从以下 Agent 对话历史中提取有价值的知识。
- 【对话历史】:
- {messages_text}
- 【提取要求】:
- 1. 识别对话中的关键知识点(工具使用经验、问题解决方案、最佳实践、踩坑经验等)
- 2. 每条知识必须包含:
- - task: 任务场景描述(在什么情况下,要完成什么目标)
- - content: 核心知识内容(具体可操作的方法、注意事项)
- - types: 知识类型(从 strategy/tool/user_profile/usecase/definition/plan 中选择)
- - score: 评分 1-5(根据知识的价值和可操作性)
- 3. 只提取有实际价值的知识,不要提取泛泛而谈的内容,一次就成功或比较简单的经验就不要记录了。
- 4. 如果没有值得提取的知识,返回空列表
- 【输出格式】:
- 严格按以下 JSON 格式输出,每条知识之间用逗号分隔:
- [
- {{
- "task": "任务场景描述",
- "content": "核心知识内容",
- "types": ["strategy"],
- "score": 4
- }},
- {{
- "task": "另一个任务场景",
- "content": "另一个知识内容",
- "types": ["tool"],
- "score": 5
- }}
- ]
- 如果没有知识,输出: []
- **注意**:只记录经过多次尝试、或经过用户指导才成功的知识,一次就成功或比较简单的经验就不要记录了。
- 禁止输出任何解释或额外文本,只输出 JSON 数组。"""
- try:
- print(f"\n[Extract] 正在从 {len(messages)} 条消息中提取知识...")
- response = await openrouter_llm_call(
- messages=[{"role": "user", "content": prompt}],
- model="google/gemini-2.5-flash-lite"
- )
- content = response.get("content", "").strip()
- # 尝试解析 JSON
- # 移除可能的 markdown 代码块标记
- if content.startswith("```json"):
- content = content[7:]
- if content.startswith("```"):
- content = content[3:]
- if content.endswith("```"):
- content = content[:-3]
- content = content.strip()
- extracted_knowledge = json.loads(content)
- if not isinstance(extracted_knowledge, list):
- raise ValueError("LLM output is not a list")
- if not extracted_knowledge:
- return {"status": "ok", "extracted_count": 0, "knowledge_ids": []}
- # 批量生成向量(只基于 task)
- texts = [item.get('task', '') for item in extracted_knowledge]
- embeddings = await get_embeddings_batch(texts)
- # 保存提取的知识
- knowledge_ids = []
- now = int(time.time())
- knowledge_list = []
- for item, embedding in zip(extracted_knowledge, embeddings):
- task = item.get("task", "")
- knowledge_content = item.get("content", "")
- types = item.get("types", ["strategy"])
- score = item.get("score", 3)
- if not task or not knowledge_content:
- continue
- # 生成 ID
- timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
- random_suffix = uuid.uuid4().hex[:4]
- knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
- # 准备数据
- source = {
- "name": "message_extraction",
- "category": "exp",
- "urls": [],
- "agent_id": extract_req.agent_id,
- "submitted_by": extract_req.submitted_by,
- "timestamp": datetime.now(timezone.utc).isoformat(),
- "session_key": extract_req.session_key
- }
- eval_data = {
- "score": score,
- "helpful": 1,
- "harmful": 0,
- "confidence": 0.7,
- "helpful_history": [],
- "harmful_history": []
- }
- knowledge_list.append({
- "id": knowledge_id,
- "embedding": embedding,
- "message_id": "",
- "task": task,
- "content": knowledge_content,
- "types": types,
- "tags": {},
- "tag_keys": [],
- "scopes": ["org:cybertogether"],
- "owner": extract_req.submitted_by,
- "resource_ids": [],
- "source": source,
- "eval": eval_data,
- "created_at": now,
- "updated_at": now,
- })
- knowledge_ids.append(knowledge_id)
- # 批量插入
- if knowledge_list:
- milvus_store.insert_batch(knowledge_list)
- print(f"[Extract] 成功提取并保存 {len(knowledge_ids)} 条知识")
- return {
- "status": "ok",
- "extracted_count": len(knowledge_ids),
- "knowledge_ids": knowledge_ids
- }
- except json.JSONDecodeError as e:
- print(f"[Extract] JSON 解析失败: {e}")
- print(f"[Extract] LLM 输出: {content[:500]}")
- return {"status": "error", "error": "Failed to parse LLM output", "extracted_count": 0}
- except Exception as e:
- print(f"[Extract] 提取失败: {e}")
- return {"status": "error", "error": str(e), "extracted_count": 0}
- @app.get("/", response_class=HTMLResponse)
- def frontend():
- """KnowHub 管理前端"""
- return """<!DOCTYPE html>
- <html lang="zh-CN">
- <head>
- <meta charset="UTF-8">
- <meta name="viewport" content="width=device-width, initial-scale=1.0">
- <title>KnowHub 管理</title>
- <script src="https://cdn.tailwindcss.com"></script>
- </head>
- <body class="bg-gray-50">
- <div class="container mx-auto px-4 py-8 max-w-7xl">
- <div class="flex justify-between items-center mb-8">
- <h1 class="text-3xl font-bold text-gray-800">KnowHub 全局知识库</h1>
- <div class="flex gap-3">
- <button onclick="toggleSelectAll()" id="selectAllBtn" class="bg-gray-500 hover:bg-gray-600 text-white px-4 py-2 rounded-lg">
- 全选
- </button>
- <button onclick="batchDelete()" id="batchDeleteBtn" class="bg-red-600 hover:bg-red-700 text-white px-4 py-2 rounded-lg disabled:opacity-50 disabled:cursor-not-allowed" disabled>
- 删除选中 (<span id="selectedCount">0</span>)
- </button>
- <button onclick="openAddModal()" class="bg-blue-600 hover:bg-blue-700 text-white px-6 py-2 rounded-lg">
- + 新增知识
- </button>
- </div>
- </div>
- <!-- 搜索栏 -->
- <div class="bg-white rounded-lg shadow p-6 mb-6">
- <div class="flex gap-4">
- <input type="text" id="searchInput" placeholder="输入任务描述进行语义搜索..."
- class="flex-1 border rounded px-4 py-2 focus:outline-none focus:ring-2 focus:ring-blue-500"
- onkeypress="if(event.key==='Enter') performSearch()">
- <button onclick="performSearch()" class="bg-blue-600 hover:bg-blue-700 text-white px-6 py-2 rounded">
- 搜索
- </button>
- <button onclick="clearSearch()" class="bg-gray-500 hover:bg-gray-600 text-white px-6 py-2 rounded">
- 清除
- </button>
- </div>
- <div id="searchStatus" class="mt-2 text-sm text-gray-600 hidden"></div>
- </div>
- <!-- 筛选栏 -->
- <div class="bg-white rounded-lg shadow p-6 mb-6">
- <div class="grid grid-cols-1 md:grid-cols-4 gap-4">
- <div>
- <label class="block text-sm font-medium text-gray-700 mb-2">类型 (Types)</label>
- <div class="space-y-2">
- <label class="flex items-center"><input type="checkbox" value="strategy" class="mr-2 type-filter"> Strategy</label>
- <label class="flex items-center"><input type="checkbox" value="tool" class="mr-2 type-filter"> Tool</label>
- <label class="flex items-center"><input type="checkbox" value="user_profile" class="mr-2 type-filter"> User Profile</label>
- <label class="flex items-center"><input type="checkbox" value="usecase" class="mr-2 type-filter"> Usecase</label>
- <label class="flex items-center"><input type="checkbox" value="definition" class="mr-2 type-filter"> Definition</label>
- <label class="flex items-center"><input type="checkbox" value="plan" class="mr-2 type-filter"> Plan</label>
- </div>
- </div>
- <div>
- <label class="block text-sm font-medium text-gray-700 mb-2">Tags</label>
- <div id="tagsFilterContainer" class="space-y-2 max-h-40 overflow-y-auto">
- <p class="text-sm text-gray-500">加载中...</p>
- </div>
- </div>
- <div>
- <label class="block text-sm font-medium text-gray-700 mb-2">Owner</label>
- <input type="text" id="ownerFilter" placeholder="输入 owner" class="w-full border rounded px-3 py-2">
- </div>
- <div>
- <label class="block text-sm font-medium text-gray-700 mb-2">Scopes</label>
- <input type="text" id="scopesFilter" placeholder="输入 scope" class="w-full border rounded px-3 py-2">
- </div>
- </div>
- <button onclick="applyFilters()" class="mt-4 bg-gray-600 hover:bg-gray-700 text-white px-4 py-2 rounded">
- 应用筛选
- </button>
- </div>
- <!-- 知识列表 -->
- <div id="knowledgeList" class="space-y-4"></div>
- <!-- 分页控件 -->
- <div id="pagination" class="flex justify-center items-center gap-4 mt-6 hidden">
- <button onclick="goToPage(currentPage - 1)" id="prevBtn" class="bg-gray-600 hover:bg-gray-700 text-white px-4 py-2 rounded disabled:opacity-50 disabled:cursor-not-allowed">
- 上一页
- </button>
- <span id="pageInfo" class="text-gray-700"></span>
- <button onclick="goToPage(currentPage + 1)" id="nextBtn" class="bg-gray-600 hover:bg-gray-700 text-white px-4 py-2 rounded disabled:opacity-50 disabled:cursor-not-allowed">
- 下一页
- </button>
- </div>
- </div>
- <!-- 新增/编辑 Modal -->
- <div id="modal" class="hidden fixed inset-0 bg-black bg-opacity-50 flex items-center justify-center p-4">
- <div class="bg-white rounded-lg max-w-2xl w-full max-h-[90vh] overflow-y-auto p-6">
- <h2 id="modalTitle" class="text-2xl font-bold mb-4">新增知识</h2>
- <form id="knowledgeForm" class="space-y-4">
- <input type="hidden" id="editId">
- <div>
- <label class="block text-sm font-medium mb-1">Task *</label>
- <input type="text" id="taskInput" required class="w-full border rounded px-3 py-2">
- </div>
- <div>
- <label class="block text-sm font-medium mb-1">Content *</label>
- <textarea id="contentInput" required rows="6" class="w-full border rounded px-3 py-2"></textarea>
- </div>
- <div>
- <label class="block text-sm font-medium mb-1">Types (多选)</label>
- <div class="space-y-1">
- <label class="flex items-center"><input type="checkbox" value="strategy" class="mr-2 type-checkbox"> Strategy</label>
- <label class="flex items-center"><input type="checkbox" value="tool" class="mr-2 type-checkbox"> Tool</label>
- <label class="flex items-center"><input type="checkbox" value="user_profile" class="mr-2 type-checkbox"> User Profile</label>
- <label class="flex items-center"><input type="checkbox" value="usecase" class="mr-2 type-checkbox"> Usecase</label>
- <label class="flex items-center"><input type="checkbox" value="definition" class="mr-2 type-checkbox"> Definition</label>
- <label class="flex items-center"><input type="checkbox" value="plan" class="mr-2 type-checkbox"> Plan</label>
- </div>
- </div>
- <div>
- <label class="block text-sm font-medium mb-1">Tags (JSON)</label>
- <textarea id="tagsInput" rows="2" placeholder='{"key": "value"}' class="w-full border rounded px-3 py-2"></textarea>
- </div>
- <div>
- <label class="block text-sm font-medium mb-1">Scopes (逗号分隔)</label>
- <input type="text" id="scopesInput" placeholder="org:cybertogether" class="w-full border rounded px-3 py-2">
- </div>
- <div>
- <label class="block text-sm font-medium mb-1">Owner</label>
- <input type="text" id="ownerInput" class="w-full border rounded px-3 py-2">
- </div>
- <div class="flex gap-2 pt-4">
- <button type="submit" class="bg-blue-600 hover:bg-blue-700 text-white px-6 py-2 rounded">保存</button>
- <button type="button" onclick="closeModal()" class="bg-gray-300 hover:bg-gray-400 px-6 py-2 rounded">取消</button>
- </div>
- </form>
- </div>
- </div>
- <script>
- let allKnowledge = [];
- let availableTags = [];
- let currentPage = 1;
- let pageSize = 200; // 每页显示200条
- let totalPages = 1;
- let totalCount = 0;
- let isSearchMode = false; // 标记是否在搜索模式
- let selectedIds = new Set(); // 选中的知识ID集合
- async function loadTags() {
- const res = await fetch('/api/knowledge/meta/tags');
- const data = await res.json();
- availableTags = data.tags;
- renderTagsFilter();
- }
- function renderTagsFilter() {
- const container = document.getElementById('tagsFilterContainer');
- if (availableTags.length === 0) {
- container.innerHTML = '<p class="text-sm text-gray-500">暂无 tags</p>';
- return;
- }
- container.innerHTML = availableTags.map(tag =>
- `<label class="flex items-center"><input type="checkbox" value="${escapeHtml(tag)}" class="mr-2 tag-filter"> ${escapeHtml(tag)}</label>`
- ).join('');
- }
- async function loadKnowledge(page = 1) {
- const params = new URLSearchParams();
- params.append('page', page);
- params.append('page_size', pageSize);
- const selectedTypes = Array.from(document.querySelectorAll('.type-filter:checked')).map(el => el.value);
- if (selectedTypes.length > 0) {
- params.append('types', selectedTypes.join(','));
- }
- const selectedTags = Array.from(document.querySelectorAll('.tag-filter:checked')).map(el => el.value);
- if (selectedTags.length > 0) {
- params.append('tags', selectedTags.join(','));
- }
- const ownerFilter = document.getElementById('ownerFilter').value.trim();
- if (ownerFilter) {
- params.append('owner', ownerFilter);
- }
- const scopesFilter = document.getElementById('scopesFilter').value.trim();
- if (scopesFilter) {
- params.append('scopes', scopesFilter);
- }
- try {
- const res = await fetch(`/api/knowledge?${params.toString()}`);
- if (!res.ok) {
- console.error('加载失败:', res.status, res.statusText);
- document.getElementById('knowledgeList').innerHTML = '<p class="text-red-500 text-center py-8">加载失败,请刷新页面重试</p>';
- return;
- }
- const data = await res.json();
- allKnowledge = data.results || [];
- currentPage = data.pagination.page;
- totalPages = data.pagination.total_pages;
- totalCount = data.pagination.total;
- renderKnowledge(allKnowledge);
- updatePagination();
- } catch (error) {
- console.error('加载错误:', error);
- document.getElementById('knowledgeList').innerHTML = '<p class="text-red-500 text-center py-8">加载错误: ' + error.message + '</p>';
- }
- }
- function applyFilters() {
- currentPage = 1; // 重置到第一页
- loadKnowledge(currentPage);
- }
- function goToPage(page) {
- if (page < 1 || page > totalPages) return;
- loadKnowledge(page);
- }
- function updatePagination() {
- const paginationDiv = document.getElementById('pagination');
- const pageInfo = document.getElementById('pageInfo');
- const prevBtn = document.getElementById('prevBtn');
- const nextBtn = document.getElementById('nextBtn');
- if (totalPages <= 1) {
- paginationDiv.classList.add('hidden');
- } else {
- paginationDiv.classList.remove('hidden');
- pageInfo.textContent = `第 ${currentPage} / ${totalPages} 页 (共 ${totalCount} 条)`;
- prevBtn.disabled = currentPage === 1;
- nextBtn.disabled = currentPage === totalPages;
- }
- }
- async function performSearch() {
- const query = document.getElementById('searchInput').value.trim();
- if (!query) {
- alert('请输入搜索内容');
- return;
- }
- isSearchMode = true;
- const statusDiv = document.getElementById('searchStatus');
- statusDiv.textContent = '搜索中...';
- statusDiv.classList.remove('hidden');
- try {
- const params = new URLSearchParams();
- params.append('q', query);
- params.append('top_k', '20');
- params.append('min_score', '1'); // 搜索时降低最低分数要求
- // 应用筛选条件
- const selectedTypes = Array.from(document.querySelectorAll('.type-filter:checked')).map(el => el.value);
- if (selectedTypes.length > 0) {
- params.append('types', selectedTypes.join(','));
- }
- const ownerFilter = document.getElementById('ownerFilter').value.trim();
- if (ownerFilter) {
- params.append('owner', ownerFilter);
- }
- const res = await fetch(`/api/knowledge/search?${params.toString()}`);
- if (!res.ok) {
- throw new Error(`搜索失败: ${res.status}`);
- }
- const data = await res.json();
- allKnowledge = data.results || [];
- statusDiv.textContent = `找到 ${allKnowledge.length} 条相关知识${data.reranked ? ' (已智能排序)' : ''}`;
- renderKnowledge(allKnowledge);
- // 搜索模式下隐藏分页
- document.getElementById('pagination').classList.add('hidden');
- } catch (error) {
- console.error('搜索错误:', error);
- statusDiv.textContent = '搜索失败: ' + error.message;
- statusDiv.classList.add('text-red-500');
- }
- }
- function clearSearch() {
- document.getElementById('searchInput').value = '';
- document.getElementById('searchStatus').classList.add('hidden');
- document.getElementById('searchStatus').classList.remove('text-red-500');
- isSearchMode = false;
- currentPage = 1;
- loadKnowledge(currentPage);
- }
- function renderKnowledge(list) {
- const container = document.getElementById('knowledgeList');
- if (list.length === 0) {
- container.innerHTML = '<p class="text-gray-500 text-center py-8">暂无知识</p>';
- return;
- }
- container.innerHTML = list.map(k => {
- // 确保types是数组
- let types = [];
- if (Array.isArray(k.types)) {
- types = k.types;
- } else if (typeof k.types === 'string') {
- // 如果是JSON字符串(以[开头),尝试解析
- if (k.types.startsWith('[')) {
- try {
- types = JSON.parse(k.types);
- } catch (e) {
- console.error('解析types失败:', k.types, e);
- types = [k.types];
- }
- } else {
- // 如果是普通字符串,包装成数组
- types = [k.types];
- }
- }
- const eval_data = k.eval || {};
- const isChecked = selectedIds.has(k.id);
- return `
- <div class="bg-white rounded-lg shadow p-6 hover:shadow-lg transition relative">
- <div class="absolute top-4 left-4">
- <input type="checkbox" class="knowledge-checkbox w-5 h-5 cursor-pointer"
- data-id="${k.id}" ${isChecked ? 'checked' : ''}
- onclick="event.stopPropagation(); toggleSelect('${k.id}')">
- </div>
- <div class="ml-10 cursor-pointer" onclick="openEditModal('${k.id}')">
- <div class="flex justify-between items-start mb-2">
- <div class="flex gap-2 flex-wrap">
- ${types.map(t => `<span class="bg-blue-100 text-blue-800 text-xs px-2 py-1 rounded">${t}</span>`).join('')}
- </div>
- <span class="text-sm text-gray-500">${eval_data.score || 3}/5</span>
- </div>
- <h3 class="text-lg font-semibold text-gray-800 mb-2">${escapeHtml(k.task)}</h3>
- <p class="text-sm text-gray-600 mb-2">${escapeHtml(k.content.substring(0, 150))}${k.content.length > 150 ? '...' : ''}</p>
- <div class="flex justify-between text-xs text-gray-500">
- <span>Owner: ${k.owner || 'N/A'}</span>
- <span>${new Date(k.created_at).toLocaleDateString()}</span>
- </div>
- </div>
- </div>
- `;
- }).join('');
- }
- function toggleSelect(id) {
- if (selectedIds.has(id)) {
- selectedIds.delete(id);
- } else {
- selectedIds.add(id);
- }
- updateBatchDeleteButton();
- }
- function toggleSelectAll() {
- if (selectedIds.size === allKnowledge.length) {
- // 全部取消选中
- selectedIds.clear();
- } else {
- // 全部选中
- selectedIds.clear();
- allKnowledge.forEach(k => selectedIds.add(k.id));
- }
- renderKnowledge(allKnowledge);
- updateBatchDeleteButton();
- }
- function updateBatchDeleteButton() {
- const count = selectedIds.size;
- document.getElementById('selectedCount').textContent = count;
- document.getElementById('batchDeleteBtn').disabled = count === 0;
- document.getElementById('selectAllBtn').textContent =
- selectedIds.size === allKnowledge.length ? '取消全选' : '全选';
- }
- async function batchDelete() {
- if (selectedIds.size === 0) return;
- if (!confirm(`确定要删除选中的 ${selectedIds.size} 条知识吗?此操作不可恢复!`)) {
- return;
- }
- try {
- const ids = Array.from(selectedIds);
- const res = await fetch('/api/knowledge/batch_delete', {
- method: 'POST',
- headers: {'Content-Type': 'application/json'},
- body: JSON.stringify(ids)
- });
- if (!res.ok) {
- throw new Error(`删除失败: ${res.status}`);
- }
- const data = await res.json();
- alert(`成功删除 ${data.deleted_count} 条知识`);
- // 清空选择并重新加载
- selectedIds.clear();
- updateBatchDeleteButton();
- if (isSearchMode) {
- clearSearch();
- } else {
- loadKnowledge(currentPage);
- }
- } catch (error) {
- console.error('批量删除错误:', error);
- alert('删除失败: ' + error.message);
- }
- }
- function openAddModal() {
- document.getElementById('modalTitle').textContent = '新增知识';
- document.getElementById('knowledgeForm').reset();
- document.getElementById('editId').value = '';
- document.querySelectorAll('.type-checkbox').forEach(el => el.checked = false);
- document.getElementById('modal').classList.remove('hidden');
- }
- async function openEditModal(id) {
- const k = allKnowledge.find(item => item.id === id);
- if (!k) return;
- document.getElementById('modalTitle').textContent = '编辑知识';
- document.getElementById('editId').value = k.id;
- document.getElementById('taskInput').value = k.task || '';
- document.getElementById('contentInput').value = k.content || '';
- document.getElementById('tagsInput').value = JSON.stringify(k.tags || {});
- // 防御性检查:确保 scopes 是数组
- const scopes = Array.isArray(k.scopes) ? k.scopes : [];
- document.getElementById('scopesInput').value = scopes.join(', ');
- document.getElementById('ownerInput').value = k.owner || '';
- // 防御性检查:确保 types 是数组
- const types = Array.isArray(k.types) ? k.types : [];
- document.querySelectorAll('.type-checkbox').forEach(el => {
- el.checked = types.includes(el.value);
- });
- document.getElementById('modal').classList.remove('hidden');
- }
- function closeModal() {
- document.getElementById('modal').classList.add('hidden');
- }
- document.getElementById('knowledgeForm').addEventListener('submit', async (e) => {
- e.preventDefault();
- const editId = document.getElementById('editId').value;
- const task = document.getElementById('taskInput').value;
- const content = document.getElementById('contentInput').value;
- const types = Array.from(document.querySelectorAll('.type-checkbox:checked')).map(el => el.value);
- const tagsText = document.getElementById('tagsInput').value.trim();
- const scopesText = document.getElementById('scopesInput').value.trim();
- const owner = document.getElementById('ownerInput').value.trim();
- let tags = {};
- if (tagsText) {
- try {
- tags = JSON.parse(tagsText);
- } catch (e) {
- alert('Tags JSON 格式错误');
- return;
- }
- }
- const scopes = scopesText ? scopesText.split(',').map(s => s.trim()).filter(s => s) : ['org:cybertogether'];
- if (editId) {
- // 编辑
- const res = await fetch(`/api/knowledge/${editId}`, {
- method: 'PATCH',
- headers: {'Content-Type': 'application/json'},
- body: JSON.stringify({task, content, types, tags, scopes, owner})
- });
- if (!res.ok) {
- alert('更新失败');
- return;
- }
- } else {
- // 新增
- const res = await fetch('/api/knowledge', {
- method: 'POST',
- headers: {'Content-Type': 'application/json'},
- body: JSON.stringify({task, content, types, tags, scopes, owner})
- });
- if (!res.ok) {
- alert('新增失败');
- return;
- }
- }
- closeModal();
- await loadKnowledge();
- });
- function escapeHtml(text) {
- const div = document.createElement('div');
- div.textContent = text;
- return div.innerHTML;
- }
- loadTags();
- loadKnowledge();
- </script>
- </body>
- </html>"""
- if __name__ == "__main__":
- import uvicorn
- uvicorn.run(app, host="0.0.0.0", port=9999)
|