server.py 99 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815
  1. """
  2. KnowHub Server
  3. Agent 工具使用经验的共享平台。
  4. FastAPI + Milvus Lite(知识)+ SQLite(资源),单文件部署。
  5. """
  6. import os
  7. import re
  8. import json
  9. import asyncio
  10. import base64
  11. import time
  12. import uuid
  13. from contextlib import asynccontextmanager
  14. from datetime import datetime, timezone
  15. from typing import Optional, List, Dict
  16. from pathlib import Path
  17. import httpx
  18. from cryptography.hazmat.primitives.ciphers.aead import AESGCM
  19. from fastapi import FastAPI, HTTPException, Query, Header, Body, BackgroundTasks, Request
  20. from fastapi.responses import HTMLResponse, FileResponse
  21. from fastapi.staticfiles import StaticFiles
  22. from pydantic import BaseModel, Field
  23. # 导入 LLM 调用(需要 agent 模块在 Python path 中)
  24. import sys
  25. sys.path.insert(0, str(Path(__file__).parent.parent))
  26. # 加载环境变量
  27. from dotenv import load_dotenv
  28. load_dotenv(Path(__file__).parent.parent / ".env")
  29. from agent.llm import create_openrouter_llm_call, create_qwen_llm_call
  30. from knowhub.kb_manage_prompts import (
  31. DEDUP_RELATION_PROMPT,
  32. TOOL_ANALYSIS_PROMPT,
  33. RERANK_PROMPT_TEMPLATE,
  34. KNOWLEDGE_EVOLVE_PROMPT_TEMPLATE,
  35. KNOWLEDGE_SLIM_PROMPT_TEMPLATE,
  36. MESSAGE_EXTRACT_PROMPT_TEMPLATE,
  37. )
  38. _dedup_llm = create_openrouter_llm_call(model="google/gemini-2.5-flash-lite")
  39. _tool_analysis_llm = create_qwen_llm_call(model="qwen3.5-plus")
  40. # 导入向量存储和 embedding
  41. from knowhub.knowhub_db.pg_store import PostgreSQLStore
  42. from knowhub.knowhub_db.pg_resource_store import PostgreSQLResourceStore
  43. from knowhub.knowhub_db.pg_tool_store import PostgreSQLToolStore
  44. from knowhub.knowhub_db.pg_capability_store import PostgreSQLCapabilityStore
  45. from knowhub.knowhub_db.pg_requirement_store import PostgreSQLRequirementStore
  46. from knowhub.knowhub_db.pg_strategy_store import PostgreSQLStrategyStore
  47. from knowhub.knowhub_db.version_context import set_version as _set_active_version, DEFAULT_VERSION as _DEFAULT_VERSION
  48. from knowhub.embeddings import get_embedding, get_embeddings_batch
  49. BRAND_NAME = os.getenv("BRAND_NAME", "KnowHub")
  50. BRAND_API_ENV = os.getenv("BRAND_API_ENV", "KNOWHUB_API")
  51. BRAND_DB = os.getenv("BRAND_DB", "knowhub.db")
  52. # 组织密钥配置(格式:org1:key1_base64,org2:key2_base64)
  53. ORG_KEYS_RAW = os.getenv("ORG_KEYS", "")
  54. ORG_KEYS = {}
  55. if ORG_KEYS_RAW:
  56. for pair in ORG_KEYS_RAW.split(","):
  57. if ":" in pair:
  58. org, key_b64 = pair.split(":", 1)
  59. ORG_KEYS[org.strip()] = key_b64.strip()
  60. DB_PATH = Path(__file__).parent / BRAND_DB
  61. # 全局 PostgreSQL 存储实例
  62. pg_store: Optional[PostgreSQLStore] = None
  63. pg_resource_store: Optional[PostgreSQLResourceStore] = None
  64. pg_tool_store: Optional[PostgreSQLToolStore] = None
  65. pg_capability_store: Optional[PostgreSQLCapabilityStore] = None
  66. pg_requirement_store: Optional[PostgreSQLRequirementStore] = None
  67. pg_strategy_store: Optional[PostgreSQLStrategyStore] = None
  68. # --- 加密/解密 ---
  69. def get_org_key(resource_id: str) -> Optional[bytes]:
  70. """从content_id提取组织前缀,返回对应密钥"""
  71. if "/" in resource_id:
  72. org = resource_id.split("/")[0]
  73. if org in ORG_KEYS:
  74. return base64.b64decode(ORG_KEYS[org])
  75. return None
  76. def encrypt_content(resource_id: str, plaintext: str) -> str:
  77. """加密内容,返回格式:encrypted:AES256-GCM:{base64_data}"""
  78. if not plaintext:
  79. return ""
  80. key = get_org_key(resource_id)
  81. if not key:
  82. # 没有配置密钥,明文存储(不推荐)
  83. return plaintext
  84. aesgcm = AESGCM(key)
  85. nonce = os.urandom(12) # 96-bit nonce
  86. ciphertext = aesgcm.encrypt(nonce, plaintext.encode("utf-8"), None)
  87. # 组合 nonce + ciphertext
  88. encrypted_data = nonce + ciphertext
  89. encoded = base64.b64encode(encrypted_data).decode("ascii")
  90. return f"encrypted:AES256-GCM:{encoded}"
  91. def decrypt_content(resource_id: str, encrypted_text: str, provided_key: Optional[str] = None) -> str:
  92. """解密内容,如果没有提供密钥或密钥错误,返回[ENCRYPTED]"""
  93. if not encrypted_text:
  94. return ""
  95. if not encrypted_text.startswith("encrypted:AES256-GCM:"):
  96. # 未加密的内容,直接返回
  97. return encrypted_text
  98. # 提取加密数据
  99. encoded = encrypted_text.split(":", 2)[2]
  100. encrypted_data = base64.b64decode(encoded)
  101. nonce = encrypted_data[:12]
  102. ciphertext = encrypted_data[12:]
  103. # 获取密钥
  104. key = None
  105. if provided_key:
  106. # 使用提供的密钥
  107. try:
  108. key = base64.b64decode(provided_key)
  109. except Exception:
  110. return "[ENCRYPTED]"
  111. else:
  112. # 从配置中获取
  113. key = get_org_key(resource_id)
  114. if not key:
  115. return "[ENCRYPTED]"
  116. try:
  117. aesgcm = AESGCM(key)
  118. plaintext = aesgcm.decrypt(nonce, ciphertext, None)
  119. return plaintext.decode("utf-8")
  120. except Exception:
  121. return "[ENCRYPTED]"
  122. def to_serializable(data):
  123. """通用序列化工具:把任意 Python 对象转换为 JSON 可序列化的原生类型"""
  124. # 基本类型直接返回
  125. if data is None or isinstance(data, (str, int, float, bool)):
  126. return data
  127. # 字典类型递归处理
  128. if isinstance(data, dict):
  129. return {k: to_serializable(v) for k, v in data.items()}
  130. # 列表/元组类型递归处理
  131. if isinstance(data, (list, tuple)):
  132. return [to_serializable(item) for item in data]
  133. # 尝试转换为字典(对于有 to_dict 方法的对象)
  134. if hasattr(data, 'to_dict') and callable(getattr(data, 'to_dict')):
  135. try:
  136. return to_serializable(data.to_dict())
  137. except:
  138. pass
  139. # 尝试转换为列表(对于可迭代对象,如 RepeatedScalarContainer)
  140. if hasattr(data, '__iter__') and not isinstance(data, (str, bytes, dict)):
  141. try:
  142. # 强制转换为列表并递归处理
  143. result = []
  144. for item in data:
  145. result.append(to_serializable(item))
  146. return result
  147. except:
  148. pass
  149. # 尝试获取对象的属性字典
  150. if hasattr(data, '__dict__'):
  151. try:
  152. return to_serializable(vars(data))
  153. except:
  154. pass
  155. # 最后的 fallback:对于无法处理的类型,返回 None 而不是字符串表示
  156. # 这样可以避免产生无法序列化的字符串
  157. return None
  158. # --- Models ---
  159. class ResourceIn(BaseModel):
  160. id: str
  161. title: str = ""
  162. body: str
  163. secure_body: str = ""
  164. content_type: str = "text" # text|code|credential|cookie
  165. metadata: dict = {}
  166. sort_order: int = 0
  167. submitted_by: str = ""
  168. class ResourcePatchIn(BaseModel):
  169. """PATCH /api/resource/{id} 请求体"""
  170. title: Optional[str] = None
  171. body: Optional[str] = None
  172. secure_body: Optional[str] = None
  173. content_type: Optional[str] = None
  174. metadata: Optional[dict] = None
  175. class PostBatchRequest(BaseModel):
  176. post_ids: List[str] = Field(default_factory=list)
  177. # Knowledge Models
  178. class KnowledgeIn(BaseModel):
  179. task: str
  180. content: str
  181. types: list[str] = ["strategy"]
  182. tags: dict = {}
  183. scopes: list[str] = ["org:cybertogether"]
  184. owner: str = ""
  185. message_id: str = ""
  186. resource_ids: list[str] = []
  187. source: dict = {} # {name, category, urls, agent_id, submitted_by, timestamp}
  188. eval: dict = {} # {score, helpful, harmful, confidence}
  189. capability_ids: list[str] = []
  190. tool_ids: list[str] = []
  191. class KnowledgeOut(BaseModel):
  192. id: str
  193. message_id: str
  194. types: list[str]
  195. task: str
  196. tags: dict
  197. scopes: list[str]
  198. owner: str
  199. content: str
  200. source: dict
  201. eval: dict
  202. created_at: str
  203. updated_at: str
  204. class KnowledgeUpdateIn(BaseModel):
  205. add_helpful_case: Optional[dict] = None
  206. add_harmful_case: Optional[dict] = None
  207. update_score: Optional[int] = Field(default=None, ge=1, le=5)
  208. evolve_feedback: Optional[str] = None
  209. class KnowledgePatchIn(BaseModel):
  210. """PATCH /api/knowledge/{id} 请求体(直接字段编辑)"""
  211. task: Optional[str] = None
  212. content: Optional[str] = None
  213. types: Optional[list[str]] = None
  214. tags: Optional[dict] = None
  215. scopes: Optional[list[str]] = None
  216. owner: Optional[str] = None
  217. capability_ids: Optional[list[str]] = None
  218. tool_ids: Optional[list[str]] = None
  219. class MessageExtractIn(BaseModel):
  220. """POST /api/extract 请求体(消息历史提取)"""
  221. messages: list[dict] # [{role: str, content: str}, ...]
  222. agent_id: str = "unknown"
  223. submitted_by: str # 必填,作为 owner
  224. session_key: str = ""
  225. class KnowledgeBatchUpdateIn(BaseModel):
  226. feedback_list: list[dict]
  227. class KnowledgeVerifyIn(BaseModel):
  228. action: str # "approve" | "reject"
  229. verified_by: str = "user"
  230. class KnowledgeBatchVerifyIn(BaseModel):
  231. knowledge_ids: List[str]
  232. action: str # "approve"
  233. verified_by: str
  234. class KnowledgeSearchResponse(BaseModel):
  235. results: list[dict]
  236. count: int
  237. # --- Tool Models ---
  238. class ToolIn(BaseModel):
  239. id: str
  240. name: str = ""
  241. version: Optional[str] = None
  242. introduction: str = ""
  243. tutorial: str = ""
  244. input: dict | str = ""
  245. output: dict | str = ""
  246. status: str = "未接入"
  247. capability_ids: list[str] = []
  248. knowledge_ids: list[str] = []
  249. provider_ids: list[str] = []
  250. class ToolPatchIn(BaseModel):
  251. name: Optional[str] = None
  252. version: Optional[str] = None
  253. introduction: Optional[str] = None
  254. tutorial: Optional[str] = None
  255. input: Optional[dict | str] = None
  256. output: Optional[dict | str] = None
  257. status: Optional[str] = None
  258. capability_ids: Optional[list[str]] = None
  259. knowledge_ids: Optional[list[str]] = None
  260. provider_ids: Optional[list[str]] = None
  261. # --- Capability Models ---
  262. class CapabilityIn(BaseModel):
  263. id: str
  264. name: str = ""
  265. criterion: str = ""
  266. description: str = ""
  267. requirement_ids: list[str] = []
  268. implements: dict = {}
  269. tool_ids: list[str] = []
  270. knowledge_ids: list[str] = []
  271. class CapabilityPatchIn(BaseModel):
  272. name: Optional[str] = None
  273. criterion: Optional[str] = None
  274. description: Optional[str] = None
  275. requirement_ids: Optional[list[str]] = None
  276. implements: Optional[dict] = None
  277. tool_ids: Optional[list[str]] = None
  278. knowledge_ids: Optional[list[str]] = None
  279. # --- Requirement Models ---
  280. class RequirementIn(BaseModel):
  281. id: str
  282. description: str = ""
  283. capability_ids: list[str] = []
  284. knowledge_ids: list[str] = []
  285. source_nodes: list[dict] = []
  286. status: str = "未满足"
  287. match_result: str = ""
  288. class RequirementPatchIn(BaseModel):
  289. description: Optional[str] = None
  290. capability_ids: Optional[list[str]] = None
  291. knowledge_ids: Optional[list[str]] = None
  292. source_nodes: Optional[list[dict]] = None
  293. status: Optional[str] = None
  294. match_result: Optional[str] = None
  295. # --- Strategy Models ---
  296. class StrategyIn(BaseModel):
  297. id: str
  298. name: str = ""
  299. description: str = ""
  300. body: str = ""
  301. status: str = "draft"
  302. version: str = "v0"
  303. capability_ids: list[str] = []
  304. knowledge_ids: list[str] = []
  305. resource_ids: list[str] = []
  306. requirement_ids: list[str] = []
  307. class StrategyPatchIn(BaseModel):
  308. name: Optional[str] = None
  309. description: Optional[str] = None
  310. body: Optional[str] = None
  311. status: Optional[str] = None
  312. version: Optional[str] = None
  313. capability_ids: Optional[list[str]] = None
  314. knowledge_ids: Optional[list[str]] = None
  315. resource_ids: Optional[list[str]] = None
  316. requirement_ids: Optional[list[str]] = None
  317. class ResourceNode(BaseModel):
  318. id: str
  319. title: str
  320. class ResourceOut(BaseModel):
  321. id: str
  322. title: str
  323. body: str
  324. secure_body: str = ""
  325. content_type: str = "text"
  326. metadata: dict = {}
  327. images: list[str] = []
  328. toc: Optional[ResourceNode] = None
  329. children: list[ResourceNode]
  330. prev: Optional[ResourceNode] = None
  331. next: Optional[ResourceNode] = None
  332. # --- Dedup: Globals & Prompt ---
  333. knowledge_processor: Optional["KnowledgeProcessor"] = None
  334. # --- Dedup: RelationCache ---
  335. class RelationCache:
  336. """关系缓存,存储在内存中"""
  337. def __init__(self):
  338. self._cache: Dict[str, List[str]] = {}
  339. def load(self) -> dict:
  340. return self._cache
  341. def save(self, cache: dict):
  342. self._cache = cache
  343. def add_relation(self, relation_type: str, knowledge_id: str):
  344. if relation_type not in self._cache:
  345. self._cache[relation_type] = []
  346. if knowledge_id not in self._cache[relation_type]:
  347. self._cache[relation_type].append(knowledge_id)
  348. # --- Dedup: KnowledgeProcessor ---
  349. class KnowledgeProcessor:
  350. def __init__(self):
  351. self._lock = asyncio.Lock()
  352. self._relation_cache = RelationCache()
  353. async def process_pending(self):
  354. """持续处理 pending 和 dedup_passed 知识直到队列为空,有锁防并发"""
  355. if self._lock.locked():
  356. return
  357. async with self._lock:
  358. # 第一阶段:处理 pending(去重)
  359. while True:
  360. try:
  361. pending = pg_store.query('status == "pending"', limit=50)
  362. except Exception as e:
  363. print(f"[KnowledgeProcessor] 查询 pending 失败: {e}")
  364. break
  365. if not pending:
  366. break
  367. for knowledge in pending:
  368. await self._process_one(knowledge)
  369. # 第二阶段:处理 dedup_passed(工具关联)
  370. while True:
  371. try:
  372. dedup_passed = pg_store.query('status == "dedup_passed"', limit=50)
  373. except Exception as e:
  374. print(f"[KnowledgeProcessor] 查询 dedup_passed 失败: {e}")
  375. break
  376. if not dedup_passed:
  377. break
  378. for knowledge in dedup_passed:
  379. await self._analyze_tool_relation(knowledge)
  380. async def _process_one(self, knowledge: dict):
  381. kid = knowledge["id"]
  382. now = int(time.time())
  383. # 乐观锁:pending → processing(时间戳存秒级)
  384. try:
  385. pg_store.update(kid, {"status": "processing", "updated_at": now})
  386. except Exception as e:
  387. print(f"[KnowledgeProcessor] 锁定 {kid} 失败: {e}")
  388. return
  389. try:
  390. # 向量召回 top-10(只召回 approved/checked)
  391. embedding = knowledge.get("task_embedding") or knowledge.get("embedding")
  392. if not embedding:
  393. embedding = await get_embedding(knowledge["task"])
  394. candidates = pg_store.search(
  395. query_embedding=embedding,
  396. filters='(status == "approved" or status == "checked")',
  397. limit=10
  398. )
  399. candidates = [c for c in candidates if c["id"] != kid]
  400. # 只保留相似度 >= 0.75 的候选,低于阈值的 task 语义差异太大,直接视为 none
  401. candidates = [c for c in candidates if c.get("score", 0) >= 0.75]
  402. if not candidates:
  403. pg_store.update(kid, {"status": "dedup_passed", "updated_at": now})
  404. return
  405. llm_result = await self._llm_judge_relations(knowledge, candidates)
  406. await self._apply_decision(knowledge, llm_result)
  407. except Exception as e:
  408. print(f"[KnowledgeProcessor] 处理 {kid} 失败: {e},回退到 pending")
  409. try:
  410. pg_store.update(kid, {"status": "pending", "updated_at": int(time.time())})
  411. except Exception:
  412. pass
  413. async def _llm_judge_relations(self, new_knowledge: dict, candidates: list) -> dict:
  414. existing_list = "\n".join([
  415. f"[{i+1}] ID: {c['id']} | Task: {c['task']} | Content: {c['content'][:300]}"
  416. for i, c in enumerate(candidates)
  417. ])
  418. prompt = DEDUP_RELATION_PROMPT.format(
  419. new_task=new_knowledge["task"],
  420. new_content=new_knowledge["content"],
  421. existing_list=existing_list
  422. )
  423. for attempt in range(3):
  424. try:
  425. response = await _dedup_llm(
  426. messages=[{"role": "user", "content": prompt}],
  427. )
  428. content = response.get("content", "").strip()
  429. # 清理 markdown 代码块
  430. if "```" in content:
  431. parts = content.split("```")
  432. for part in parts:
  433. part = part.strip()
  434. if part.startswith("json"):
  435. part = part[4:].strip()
  436. try:
  437. result = json.loads(part)
  438. if "final_decision" in result:
  439. content = part
  440. break
  441. except Exception:
  442. continue
  443. result = json.loads(content)
  444. assert result.get("final_decision") in ("approved", "rejected")
  445. return result
  446. except Exception as e:
  447. print(f"[LLM Judge] 第{attempt+1}次失败: {e}")
  448. if attempt < 2:
  449. await asyncio.sleep(1)
  450. return {"final_decision": "approved", "relations": []}
  451. async def _apply_decision(self, new_knowledge: dict, llm_result: dict):
  452. kid = new_knowledge["id"]
  453. final_decision = llm_result.get("final_decision", "approved")
  454. relations = llm_result.get("relations", [])
  455. now = int(time.time())
  456. # 强制规则:如果存在 duplicate 或 subset 关系,必须 rejected
  457. if any(rel.get("type") in ("duplicate", "subset") for rel in relations):
  458. final_decision = "rejected"
  459. if final_decision == "rejected":
  460. # 记录 rejected 知识的关系到 knowledge_relation 表
  461. for rel in relations:
  462. old_id = rel.get("old_id")
  463. rel_type = rel.get("type", "none")
  464. if old_id and rel_type != "none":
  465. pg_store.add_relation(kid, old_id, rel_type)
  466. if rel_type in ("duplicate", "subset") and old_id:
  467. try:
  468. old = pg_store.get_by_id(old_id)
  469. if not old:
  470. continue
  471. eval_data = old.get("eval") or {}
  472. eval_data["helpful"] = eval_data.get("helpful", 0) + 1
  473. helpful_history = eval_data.get("helpful_history") or []
  474. helpful_history.append({
  475. "source": "dedup",
  476. "related_id": kid,
  477. "relation_type": rel_type,
  478. "timestamp": now
  479. })
  480. eval_data["helpful_history"] = helpful_history
  481. pg_store.update(old_id, {"eval": eval_data, "updated_at": now})
  482. except Exception as e:
  483. print(f"[Apply Decision] 更新旧知识 {old_id} helpful 失败: {e}")
  484. pg_store.update(kid, {"status": "rejected", "updated_at": now})
  485. else:
  486. for rel in relations:
  487. rel_type = rel.get("type", "none")
  488. reverse_type = rel.get("reverse_type", "none")
  489. old_id = rel.get("old_id")
  490. if not old_id or rel_type == "none":
  491. continue
  492. pg_store.add_relation(kid, old_id, rel_type)
  493. self._relation_cache.add_relation(rel_type, kid)
  494. self._relation_cache.add_relation(rel_type, old_id)
  495. if reverse_type and reverse_type != "none":
  496. try:
  497. pg_store.add_relation(old_id, kid, reverse_type)
  498. self._relation_cache.add_relation(reverse_type, old_id)
  499. self._relation_cache.add_relation(reverse_type, kid)
  500. except Exception as e:
  501. print(f"[Apply Decision] 写入反向关系 {old_id} 失败: {e}")
  502. pg_store.update(kid, {
  503. "status": "dedup_passed",
  504. "updated_at": now
  505. })
  506. async def _llm_analyze_tools(self, knowledge: dict) -> dict:
  507. """使用 LLM 分析知识中涉及的工具(复用迁移脚本逻辑)"""
  508. task = (knowledge.get("task") or "")[:600]
  509. content = (knowledge.get("content") or "")[:1200]
  510. prompt = TOOL_ANALYSIS_PROMPT.format(task=task, content=content)
  511. try:
  512. response = await _tool_analysis_llm(
  513. messages=[{"role": "user", "content": prompt}],
  514. max_tokens=2048,
  515. temperature=0.1,
  516. )
  517. raw = (response.get("content") or "").strip()
  518. if raw.startswith("```"):
  519. lines = raw.split("\n")
  520. inner = []
  521. in_block = False
  522. for line in lines:
  523. if line.startswith("```"):
  524. in_block = not in_block
  525. continue
  526. if in_block:
  527. inner.append(line)
  528. raw = "\n".join(inner).strip()
  529. return json.loads(raw)
  530. except Exception as e:
  531. print(f"[Tool Analysis LLM] 调用失败: {e}")
  532. raise
  533. async def _create_or_get_tool_resource(self, tool_info: dict) -> Optional[str]:
  534. """创建或获取工具资源(存入 PostgreSQL tool 表)"""
  535. category = tool_info.get("category", "other")
  536. slug = tool_info.get("slug", "")
  537. if not slug:
  538. return None
  539. tool_id = f"tools/{category}/{slug}"
  540. now_ts = int(time.time())
  541. cursor = pg_store._get_cursor()
  542. try:
  543. cursor.execute("SELECT id FROM tool WHERE id = %s", (tool_id,))
  544. if cursor.fetchone():
  545. return tool_id
  546. cursor.execute("""
  547. INSERT INTO tool (id, name, version, introduction, tutorial, input, output,
  548. updated_time, status)
  549. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)
  550. """, (
  551. tool_id,
  552. tool_info.get("name", slug),
  553. tool_info.get("version") or None,
  554. tool_info.get("description", ""),
  555. tool_info.get("usage", ""),
  556. json.dumps(tool_info.get("input", "")),
  557. json.dumps(tool_info.get("output", "")),
  558. now_ts,
  559. tool_info.get("status", "未接入"),
  560. ))
  561. pg_store.conn.commit()
  562. print(f"[Tool Resource] 创建新工具: {tool_id}")
  563. return tool_id
  564. finally:
  565. cursor.close()
  566. async def _update_tool_knowledge_index(self, tool_id: str, knowledge_id: str):
  567. """向工具添加知识关联(写入 tool_knowledge 关联表)"""
  568. pg_tool_store.add_knowledge(tool_id, knowledge_id)
  569. async def _analyze_tool_relation(self, knowledge: dict):
  570. """分析知识与工具的关联关系"""
  571. kid = knowledge["id"]
  572. now = int(time.time())
  573. # 乐观锁:dedup_passed → analyzing
  574. try:
  575. pg_store.update(kid, {"status": "analyzing", "updated_at": now})
  576. except Exception as e:
  577. print(f"[Tool Analysis] 锁定 {kid} 失败: {e}")
  578. return
  579. try:
  580. tool_analysis = await self._llm_analyze_tools(knowledge)
  581. has_tools = bool(tool_analysis and tool_analysis.get("has_tools"))
  582. existing_tags = knowledge.get("tags") or {}
  583. has_tool_tag = existing_tags.get("tool") is True
  584. # 情况1:LLM 判定无工具,但有 tool tag → 重新分析一次
  585. if not has_tools and has_tool_tag:
  586. print(f"[Tool Analysis] {kid} LLM 判定无工具但有 tool tag,重新分析")
  587. tool_analysis = await self._llm_analyze_tools(knowledge)
  588. has_tools = bool(tool_analysis and tool_analysis.get("has_tools"))
  589. # 重新分析后仍然不一致 → 知识模糊,rejected
  590. if not has_tools:
  591. pg_store.update(kid, {"status": "rejected", "updated_at": now})
  592. print(f"[Tool Analysis] {kid} 两次判定不一致,知识模糊,rejected")
  593. return
  594. # 情况2:无工具且无 tool tag → 直接 approved
  595. if not has_tools:
  596. pg_store.update(kid, {"status": "approved", "updated_at": now})
  597. return
  598. # 情况3/4:有工具 → 创建工具并关联
  599. tool_ids = []
  600. for tool_info in (tool_analysis.get("tools") or []):
  601. tool_id = await self._create_or_get_tool_resource(tool_info)
  602. if tool_id:
  603. tool_ids.append(tool_id)
  604. updates: dict = {
  605. "status": "approved",
  606. "updated_at": now
  607. }
  608. # 有工具但无 tool tag → 添加 tag
  609. if not has_tool_tag:
  610. updated_tags = dict(existing_tags)
  611. updated_tags["tool"] = True
  612. updates["tags"] = updated_tags
  613. print(f"[Tool Analysis] {kid} 添加 tool tag")
  614. pg_store.update(kid, updates)
  615. # 写入 tool_knowledge 关联
  616. for tool_id in tool_ids:
  617. await self._update_tool_knowledge_index(tool_id, kid)
  618. print(f"[Tool Analysis] {kid} 关联了 {len(tool_ids)} 个工具")
  619. except Exception as e:
  620. print(f"[Tool Analysis] {kid} 分析失败: {e},回退到 dedup_passed")
  621. try:
  622. pg_store.update(kid, {"status": "dedup_passed", "updated_at": int(time.time())})
  623. except Exception:
  624. pass
  625. async def _periodic_processor():
  626. """每60秒检测超时条目并回滚:processing(>5min)→pending,analyzing(>10min)→dedup_passed"""
  627. while True:
  628. await asyncio.sleep(60)
  629. try:
  630. now = int(time.time())
  631. # 回滚超时的 processing(5分钟 → pending)
  632. timeout_5min = now - 300
  633. processing = pg_store.query('status == "processing"', limit=200)
  634. for item in processing:
  635. updated_at = item.get("updated_at", 0) or 0
  636. updated_at_sec = updated_at // 1000 if updated_at > 1_000_000_000_000 else updated_at
  637. if updated_at_sec < timeout_5min:
  638. print(f"[Periodic] 回滚超时 processing → pending: {item['id']}")
  639. pg_store.update(item["id"], {"status": "pending", "updated_at": int(time.time())})
  640. # 回滚超时的 analyzing(10分钟 → dedup_passed)
  641. timeout_10min = now - 600
  642. analyzing = pg_store.query('status == "analyzing"', limit=200)
  643. for item in analyzing:
  644. updated_at = item.get("updated_at", 0) or 0
  645. updated_at_sec = updated_at // 1000 if updated_at > 1_000_000_000_000 else updated_at
  646. if updated_at_sec < timeout_10min:
  647. print(f"[Periodic] 回滚超时 analyzing → dedup_passed: {item['id']}")
  648. pg_store.update(item["id"], {"status": "dedup_passed", "updated_at": int(time.time())})
  649. except Exception as e:
  650. print(f"[Periodic] 定时任务错误: {e}")
  651. # --- App ---
  652. @asynccontextmanager
  653. async def lifespan(app: FastAPI):
  654. global pg_store, pg_resource_store, pg_tool_store, pg_capability_store, pg_requirement_store, pg_strategy_store, knowledge_processor
  655. # 初始化 PostgreSQL(knowledge + resources + tools + capabilities + requirements + strategy)
  656. pg_store = PostgreSQLStore()
  657. pg_resource_store = PostgreSQLResourceStore()
  658. pg_tool_store = PostgreSQLToolStore()
  659. pg_capability_store = PostgreSQLCapabilityStore()
  660. pg_requirement_store = PostgreSQLRequirementStore()
  661. pg_strategy_store = PostgreSQLStrategyStore()
  662. # 初始化去重处理器 + 启动定时兜底任务
  663. knowledge_processor = KnowledgeProcessor()
  664. periodic_task = asyncio.create_task(_periodic_processor())
  665. yield
  666. # 清理
  667. periodic_task.cancel()
  668. try:
  669. await periodic_task
  670. except asyncio.CancelledError:
  671. pass
  672. pg_store.close()
  673. pg_resource_store.close()
  674. pg_tool_store.close()
  675. pg_capability_store.close()
  676. pg_requirement_store.close()
  677. pg_strategy_store.close()
  678. app = FastAPI(title=BRAND_NAME, lifespan=lifespan)
  679. # 挂载静态文件
  680. STATIC_DIR = Path(__file__).parent / "frontend" / "dist"
  681. if STATIC_DIR.exists():
  682. app.mount("/assets", StaticFiles(directory=str(STATIC_DIR / "assets")), name="assets")
  683. # 提供 itemsets_all.json
  684. @app.get("/itemsets_all.json")
  685. async def serve_itemsets():
  686. itemsets_file = STATIC_DIR / "itemsets_all.json"
  687. if itemsets_file.exists():
  688. return FileResponse(itemsets_file)
  689. return {"error": "itemsets_all.json not found"}
  690. # --- 数据版本上下文中间件 ---
  691. # 从请求头 X-KnowHub-Version 或 query ?version=xxx 读版本,写入 contextvar;
  692. # 全链路的 store 查询都从 contextvar 读 active version 做过滤。
  693. _ALLOWED_VERSIONS = {'dev_abstract', 'dev_dedup', 'tao_dev', 'v0'}
  694. @app.middleware("http")
  695. async def set_active_version(request: Request, call_next):
  696. v = request.headers.get('X-KnowHub-Version') or request.query_params.get('version') or _DEFAULT_VERSION
  697. if v not in _ALLOWED_VERSIONS:
  698. v = _DEFAULT_VERSION
  699. _set_active_version(v)
  700. response = await call_next(request)
  701. response.headers['X-KnowHub-Version'] = v
  702. return response
  703. # --- 缓存自动失效中间件 ---
  704. # 任何对核心实体的写操作(POST/PATCH/DELETE)自动清除对应缓存
  705. _DASHBOARD_INVALIDATE_PREFIXES = ("/api/requirement", "/api/capability", "/api/tool", "/api/strategy", "/api/knowledge")
  706. @app.middleware("http")
  707. async def auto_invalidate_caches(request: Request, call_next):
  708. response = await call_next(request)
  709. if request.method in ("POST", "PATCH", "PUT", "DELETE") and response.status_code < 400:
  710. path = request.url.path
  711. if any(path.startswith(p) for p in _DASHBOARD_INVALIDATE_PREFIXES):
  712. _invalidate_dashboard_cache()
  713. if path.startswith("/api/resource") and not path.endswith("/batch"):
  714. _invalidate_resource_cache()
  715. return response
  716. # --- Knowledge API ---
  717. # --- Resource Batch API ---
  718. # --- Resource 缓存(与 Dashboard 同 TTL,写入时失效) ---
  719. _resource_cache: Dict[str, dict] = {}
  720. _resource_cache_ts: float = 0
  721. def _invalidate_resource_cache():
  722. global _resource_cache, _resource_cache_ts
  723. _resource_cache.clear()
  724. _resource_cache_ts = 0
  725. def _get_cached_resource(rid: str) -> Optional[dict]:
  726. """从缓存取 resource,miss 时查 DB 并写入缓存"""
  727. global _resource_cache_ts
  728. now = time.time()
  729. if now - _resource_cache_ts > _DASHBOARD_CACHE_TTL:
  730. _resource_cache.clear()
  731. _resource_cache_ts = now
  732. if rid in _resource_cache:
  733. return _resource_cache[rid]
  734. row = pg_resource_store.get_by_id(rid)
  735. if not row:
  736. return None
  737. entry = {
  738. "id": row["id"],
  739. "title": row["title"],
  740. "body": row["body"],
  741. "content_type": row.get("content_type", "text"),
  742. "metadata": row.get("metadata", {}),
  743. "images": row.get("images", []),
  744. }
  745. _resource_cache[rid] = entry
  746. return entry
  747. @app.post("/api/resource/batch")
  748. def batch_get_resources(body: dict = Body(...)):
  749. """批量获取 resource 基本信息(不含 toc/children/siblings 导航),用于 Dashboard 等场景。
  750. 带后端内存缓存(24h TTL,resource 写入时失效)。"""
  751. ids = body.get("ids", [])
  752. if not ids:
  753. return {"resources": {}}
  754. resources: Dict[str, dict] = {}
  755. for rid in ids:
  756. try:
  757. entry = _get_cached_resource(rid)
  758. if entry:
  759. resources[rid] = entry
  760. except Exception as e:
  761. print(f"[batch_get_resources] Failed to fetch {rid}: {e}")
  762. return {"resources": resources}
  763. @app.post("/api/resource", status_code=201)
  764. def submit_resource(resource: ResourceIn):
  765. """提交资源(存入 PostgreSQL resources 表)"""
  766. try:
  767. # 加密敏感内容
  768. encrypted_secure_body = encrypt_content(resource.id, resource.secure_body)
  769. pg_resource_store.insert_or_update({
  770. 'id': resource.id,
  771. 'title': resource.title,
  772. 'body': resource.body,
  773. 'secure_body': encrypted_secure_body,
  774. 'content_type': resource.content_type,
  775. 'metadata': resource.metadata,
  776. 'sort_order': resource.sort_order,
  777. 'submitted_by': resource.submitted_by
  778. })
  779. return {"status": "ok", "id": resource.id}
  780. except Exception as e:
  781. raise HTTPException(status_code=500, detail=str(e))
  782. @app.get("/api/resource/{resource_id:path}", response_model=ResourceOut)
  783. def get_resource(resource_id: str, x_org_key: Optional[str] = Header(None)):
  784. """获取资源详情(从 PostgreSQL)"""
  785. try:
  786. row = pg_resource_store.get_by_id(resource_id)
  787. if not row:
  788. raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
  789. # 解密敏感内容
  790. secure_body = decrypt_content(resource_id, row.get("secure_body", ""), x_org_key)
  791. # 计算导航上下文
  792. root_id = resource_id.split("/")[0] if "/" in resource_id else resource_id
  793. # TOC (根节点)
  794. toc = None
  795. if "/" in resource_id:
  796. toc_row = pg_resource_store.get_by_id(root_id)
  797. if toc_row:
  798. toc = ResourceNode(id=toc_row["id"], title=toc_row["title"])
  799. # Children (子节点)
  800. children_rows = pg_resource_store.list_resources(prefix=f"{resource_id}/", limit=1000)
  801. children = [ResourceNode(id=r["id"], title=r["title"]) for r in children_rows
  802. if r["id"].count("/") == resource_id.count("/") + 1]
  803. # Prev/Next (同级节点)
  804. prev_node, next_node = pg_resource_store.get_siblings(resource_id)
  805. prev = ResourceNode(id=prev_node["id"], title=prev_node["title"]) if prev_node else None
  806. next = ResourceNode(id=next_node["id"], title=next_node["title"]) if next_node else None
  807. return ResourceOut(
  808. id=row["id"],
  809. title=row["title"],
  810. body=row["body"],
  811. secure_body=secure_body,
  812. content_type=row["content_type"],
  813. metadata=row.get("metadata", {}),
  814. images=row.get("images", []),
  815. toc=toc,
  816. children=children,
  817. prev=prev,
  818. next=next,
  819. )
  820. except HTTPException:
  821. raise
  822. except Exception as e:
  823. raise HTTPException(status_code=500, detail=str(e))
  824. @app.patch("/api/resource/{resource_id:path}")
  825. def patch_resource(resource_id: str, patch: ResourcePatchIn):
  826. """更新resource字段(PostgreSQL)"""
  827. try:
  828. # 检查是否存在
  829. if not pg_resource_store.get_by_id(resource_id):
  830. raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
  831. # 构建更新字典
  832. updates = {}
  833. if patch.title is not None:
  834. updates['title'] = patch.title
  835. if patch.body is not None:
  836. updates['body'] = patch.body
  837. if patch.secure_body is not None:
  838. updates['secure_body'] = encrypt_content(resource_id, patch.secure_body)
  839. if patch.content_type is not None:
  840. updates['content_type'] = patch.content_type
  841. if patch.metadata is not None:
  842. updates['metadata'] = patch.metadata
  843. if not updates:
  844. return {"status": "ok", "message": "No fields to update"}
  845. pg_resource_store.update(resource_id, updates)
  846. return {"status": "ok", "id": resource_id}
  847. except HTTPException:
  848. raise
  849. except Exception as e:
  850. raise HTTPException(status_code=500, detail=str(e))
  851. @app.get("/api/resource")
  852. def list_resources(
  853. content_type: Optional[str] = Query(None),
  854. limit: int = Query(100, ge=1, le=1000)
  855. ):
  856. """列出所有resource(PostgreSQL)"""
  857. try:
  858. results = pg_resource_store.list_resources(
  859. content_type=content_type,
  860. limit=limit
  861. )
  862. return {"results": results, "count": len(results)}
  863. except Exception as e:
  864. raise HTTPException(status_code=500, detail=str(e))
  865. @app.delete("/api/resource/{resource_id:path}")
  866. def delete_resource(resource_id: str):
  867. """删除单个resource(PostgreSQL)"""
  868. try:
  869. if not pg_resource_store.get_by_id(resource_id):
  870. raise HTTPException(status_code=404, detail=f"Resource not found: {resource_id}")
  871. pg_resource_store.delete(resource_id)
  872. return {"status": "ok", "id": resource_id}
  873. except HTTPException:
  874. raise
  875. except Exception as e:
  876. raise HTTPException(status_code=500, detail=str(e))
  877. # --- Knowledge API ---
  878. # ===== Knowledge API =====
  879. async def _llm_rerank(query: str, candidates: list[dict], top_k: int) -> list[str]:
  880. """
  881. 使用 LLM 对候选知识进行精排
  882. Args:
  883. query: 查询文本
  884. candidates: 候选知识列表
  885. top_k: 返回数量
  886. Returns:
  887. 排序后的知识 ID 列表
  888. """
  889. if not candidates:
  890. return []
  891. # 构造 prompt
  892. candidates_text = "\n".join([
  893. f"[{i+1}] ID: {c['id']}\nTask: {c['task']}\nContent: {c['content'][:200]}..."
  894. for i, c in enumerate(candidates)
  895. ])
  896. prompt = RERANK_PROMPT_TEMPLATE.format(
  897. top_k=top_k,
  898. query=query,
  899. candidates_text=candidates_text
  900. )
  901. try:
  902. response = await _dedup_llm(
  903. messages=[{"role": "user", "content": prompt}],
  904. )
  905. content = response.get("content", "").strip()
  906. # 解析 ID 列表
  907. selected_ids = [
  908. idx.strip()
  909. for idx in re.split(r'[,\s]+', content)
  910. if idx.strip().startswith(("knowledge-", "research-"))
  911. ]
  912. return selected_ids[:top_k]
  913. except Exception as e:
  914. print(f"[LLM Rerank] 失败: {e}")
  915. return []
  916. # --- Knowledge Ask / Upload API (Librarian Agent HTTP 接口) ---
  917. class AgentRequest(BaseModel):
  918. """POST /api/agent 请求体(统一远端 Agent 入口)"""
  919. agent_type: str # 必填,必须是 remote_ 前缀
  920. task: str # 必填,单任务描述
  921. messages: Optional[list[dict]] = None # 预置 OpenAI 格式消息
  922. continue_from: Optional[str] = None # 已有 sub_trace_id,传入则续跑
  923. skills: Optional[list[str]] = None # 调用方指定 skill,服务器按 agent_type 的白名单过滤
  924. class AgentResponse(BaseModel):
  925. """POST /api/agent 响应体(标准 Agent 结果)"""
  926. sub_trace_id: Optional[str] = None
  927. status: str
  928. summary: str = ""
  929. stats: dict = {}
  930. error: Optional[str] = None
  931. # agent_type → handler 的映射。handler 签名: (query, continue_from, skills) -> dict
  932. # 新增远端 Agent 在这里登记。
  933. _REMOTE_AGENT_DISPATCH = {}
  934. def _get_remote_agent_dispatch():
  935. """延迟导入 agent 实现,避免循环导入和启动时加载重型依赖"""
  936. global _REMOTE_AGENT_DISPATCH
  937. if not _REMOTE_AGENT_DISPATCH:
  938. from agents.librarian import run_librarian
  939. from agents.research import research as _research
  940. _REMOTE_AGENT_DISPATCH = {
  941. "remote_librarian": run_librarian,
  942. "remote_research": _research,
  943. }
  944. return _REMOTE_AGENT_DISPATCH
  945. @app.post("/api/agent", response_model=AgentResponse)
  946. async def agent_api(req: AgentRequest):
  947. """
  948. 统一远端 Agent 入口。按 agent_type 分发到对应实现。
  949. 全部同步:Agent 运行完成后返回标准 {status, sub_trace_id, summary, stats}。
  950. 续跑由 caller 传入 continue_from 显式指定(服务器不维护 caller→trace 映射)。
  951. Skills 由 caller 指定,每个 handler 用自己的白名单过滤。
  952. """
  953. if not req.agent_type.startswith("remote_"):
  954. raise HTTPException(
  955. status_code=400,
  956. detail=f"agent_type 必须以 'remote_' 开头,收到: {req.agent_type}",
  957. )
  958. dispatch = _get_remote_agent_dispatch()
  959. handler = dispatch.get(req.agent_type)
  960. if handler is None:
  961. raise HTTPException(
  962. status_code=404,
  963. detail=f"未注册的 agent_type: {req.agent_type},可用: {list(dispatch.keys())}",
  964. )
  965. try:
  966. result = await handler(
  967. query=req.task,
  968. continue_from=req.continue_from,
  969. skills=req.skills,
  970. )
  971. return AgentResponse(**result)
  972. except Exception as e:
  973. import traceback
  974. traceback.print_exc()
  975. print(f"[/api/agent] {req.agent_type} 错误: {e}")
  976. raise HTTPException(status_code=500, detail=str(e))
  977. @app.get("/api/knowledge/upload/pending")
  978. async def list_pending_uploads_api():
  979. """列出所有未处理或失败的 upload 任务(用于排查和重跑)"""
  980. from agents.librarian import list_pending_uploads
  981. pending = list_pending_uploads()
  982. return {"pending": pending, "count": len(pending)}
  983. @app.post("/api/knowledge/upload/retry")
  984. async def retry_pending_uploads_api():
  985. """重跑所有失败的 upload 任务(同步执行,按顺序处理)"""
  986. from agents.librarian import list_pending_uploads, run_librarian
  987. pending = list_pending_uploads()
  988. failed = [p for p in pending if p["status"] == "failed"]
  989. results = []
  990. for item in failed:
  991. buffer_file = item["file"]
  992. data = json.loads(Path(buffer_file).read_text(encoding="utf-8"))
  993. payload = data.get("data", {})
  994. result = await run_librarian(
  995. query=json.dumps(payload),
  996. continue_from=None,
  997. skills=["upload_strategy"],
  998. )
  999. results.append({"file": buffer_file, "status": result.get("status"), "error": result.get("error")})
  1000. return {"retried": len(failed), "results": results}
  1001. @app.get("/api/knowledge/search")
  1002. async def search_knowledge_api(
  1003. q: str = Query(..., description="查询文本"),
  1004. top_k: int = Query(default=5, ge=1, le=20),
  1005. min_score: int = Query(default=3, ge=1, le=5),
  1006. types: Optional[str] = None,
  1007. owner: Optional[str] = None,
  1008. requirement_id: Optional[str] = None,
  1009. capability_id: Optional[str] = None,
  1010. tool_id: Optional[str] = None
  1011. ):
  1012. """检索知识(向量召回 + LLM 精排)"""
  1013. try:
  1014. # 1. 生成查询向量
  1015. query_embedding = await get_embedding(q)
  1016. # 2. 构建过滤表达式
  1017. filters = []
  1018. if types:
  1019. type_list = [t.strip() for t in types.split(',') if t.strip()]
  1020. if len(type_list) == 1:
  1021. filters.append(f'array_contains(types, "{type_list[0]}")')
  1022. elif len(type_list) > 1:
  1023. type_filters = [f'array_contains(types, "{t}")' for t in type_list]
  1024. filters.append(f'({" or ".join(type_filters)})')
  1025. if owner:
  1026. owner_list = [o.strip() for o in owner.split(',') if o.strip()]
  1027. if len(owner_list) == 1:
  1028. filters.append(f'owner == "{owner_list[0]}"')
  1029. else:
  1030. # 多个owner用OR连接
  1031. owner_filters = [f'owner == "{o}"' for o in owner_list]
  1032. filters.append(f'({" or ".join(owner_filters)})')
  1033. # 添加 min_score 过滤
  1034. filters.append(f'eval["score"] >= {min_score}')
  1035. # 只搜索 approved 和 checked 的知识
  1036. filters.append('(status == "approved" or status == "checked")')
  1037. filter_expr = ' and '.join(filters) if filters else None
  1038. relation_filters = {}
  1039. if requirement_id: relation_filters['requirement_id'] = requirement_id
  1040. if capability_id: relation_filters['capability_id'] = capability_id
  1041. if tool_id: relation_filters['tool_id'] = tool_id
  1042. # 3. 向量召回(3*k 个候选)
  1043. recall_limit = top_k * 3
  1044. candidates = pg_store.search(
  1045. query_embedding=query_embedding,
  1046. filters=filter_expr,
  1047. limit=recall_limit,
  1048. relation_filters=relation_filters
  1049. )
  1050. if not candidates:
  1051. return {"results": [], "count": 0, "reranked": False}
  1052. # 转换为可序列化的格式
  1053. serialized_candidates = [to_serializable(c) for c in candidates]
  1054. # 为了保证搜索的极致速度,直接返回向量召回的 top-k(跳过缓慢的 LLM 精排)
  1055. return {"results": serialized_candidates[:top_k], "count": len(serialized_candidates[:top_k]), "reranked": False}
  1056. except Exception as e:
  1057. print(f"[Knowledge Search] 错误: {e}")
  1058. raise HTTPException(status_code=500, detail=str(e))
  1059. @app.post("/api/knowledge", status_code=201)
  1060. async def save_knowledge(knowledge: KnowledgeIn, background_tasks: BackgroundTasks):
  1061. """保存新知识"""
  1062. try:
  1063. # 生成 ID
  1064. timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
  1065. random_suffix = uuid.uuid4().hex[:4]
  1066. knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
  1067. now = int(time.time())
  1068. # 设置默认值
  1069. owner = knowledge.owner or f"agent:{knowledge.source.get('agent_id', 'unknown')}"
  1070. # 准备 source
  1071. source = {
  1072. "name": knowledge.source.get("name", ""),
  1073. "category": knowledge.source.get("category", ""),
  1074. "urls": knowledge.source.get("urls", []),
  1075. "agent_id": knowledge.source.get("agent_id", "unknown"),
  1076. "submitted_by": knowledge.source.get("submitted_by", ""),
  1077. "timestamp": datetime.now(timezone.utc).isoformat(),
  1078. "message_id": knowledge.message_id
  1079. }
  1080. # 准备 eval
  1081. eval_data = {
  1082. "score": knowledge.eval.get("score", 3),
  1083. "helpful": knowledge.eval.get("helpful", 1),
  1084. "harmful": knowledge.eval.get("harmful", 0),
  1085. "confidence": knowledge.eval.get("confidence", 0.5),
  1086. "helpful_history": [],
  1087. "harmful_history": []
  1088. }
  1089. # 生成向量(task_embedding + content_embedding 双向量)
  1090. task_embedding = await get_embedding(knowledge.task)
  1091. content_embedding = await get_embedding(knowledge.content)
  1092. # 提取 tag keys(用于高效筛选)
  1093. tag_keys = list(knowledge.tags.keys()) if isinstance(knowledge.tags, dict) else []
  1094. # 准备插入数据
  1095. insert_data = {
  1096. "id": knowledge_id,
  1097. "task_embedding": task_embedding,
  1098. "content_embedding": content_embedding,
  1099. "message_id": knowledge.message_id,
  1100. "task": knowledge.task,
  1101. "content": knowledge.content,
  1102. "types": knowledge.types,
  1103. "tags": knowledge.tags,
  1104. "tag_keys": tag_keys,
  1105. "scopes": knowledge.scopes,
  1106. "owner": owner,
  1107. "source": source,
  1108. "eval": eval_data,
  1109. "created_at": now,
  1110. "updated_at": now,
  1111. "status": "pending",
  1112. "capability_ids": knowledge.capability_ids,
  1113. "tool_ids": knowledge.tool_ids,
  1114. "resource_ids": knowledge.resource_ids,
  1115. }
  1116. print(f"[Save Knowledge] 插入数据: {json.dumps({k: v for k, v in insert_data.items() if k != 'embedding'}, ensure_ascii=False)}")
  1117. # 插入 Milvus
  1118. pg_store.insert(insert_data)
  1119. # 触发后台去重处理
  1120. background_tasks.add_task(knowledge_processor.process_pending)
  1121. return {"status": "pending", "knowledge_id": knowledge_id, "message": "知识已入队,正在处理去重..."}
  1122. except Exception as e:
  1123. print(f"[Save Knowledge] 错误: {e}")
  1124. raise HTTPException(status_code=500, detail=str(e))
  1125. @app.get("/api/knowledge")
  1126. def list_knowledge(
  1127. page: int = Query(default=1, ge=1),
  1128. page_size: int = Query(default=20, ge=1, le=1000),
  1129. types: Optional[str] = None,
  1130. scopes: Optional[str] = None,
  1131. owner: Optional[str] = None,
  1132. tags: Optional[str] = None,
  1133. status: Optional[str] = None,
  1134. requirement_id: Optional[str] = None,
  1135. capability_id: Optional[str] = None,
  1136. tool_id: Optional[str] = None
  1137. ):
  1138. """列出知识(支持后端筛选和分页)"""
  1139. try:
  1140. # 构建过滤表达式
  1141. filters = []
  1142. # types 支持多个,改为用 OR 连接(并集:包含任意选中 type 即可)
  1143. if types:
  1144. type_list = [t.strip() for t in types.split(',') if t.strip()]
  1145. if len(type_list) == 1:
  1146. filters.append(f'array_contains(types, "{type_list[0]}")')
  1147. elif len(type_list) > 1:
  1148. type_filters = [f'array_contains(types, "{t}")' for t in type_list]
  1149. filters.append(f'({" or ".join(type_filters)})')
  1150. if scopes:
  1151. filters.append(f'array_contains(scopes, "{scopes}")')
  1152. if owner:
  1153. owner_list = [o.strip() for o in owner.split(',') if o.strip()]
  1154. if len(owner_list) == 1:
  1155. filters.append(f'owner == "{owner_list[0]}"')
  1156. else:
  1157. # 多个owner用OR连接
  1158. owner_filters = [f'owner == "{o}"' for o in owner_list]
  1159. filters.append(f'({" or ".join(owner_filters)})')
  1160. # tags 支持多个,用 AND 连接(使用 tag_keys 数组进行高效筛选)
  1161. if tags:
  1162. tag_list = [t.strip() for t in tags.split(',') if t.strip()]
  1163. for t in tag_list:
  1164. filters.append(f'array_contains(tag_keys, "{t}")')
  1165. # 只返回指定 status 的知识(默认 approved 和 checked)
  1166. status_list = [s.strip() for s in (status or "approved,checked").split(',') if s.strip()]
  1167. status_conditions = ' or '.join([f'status == "{s}"' for s in status_list])
  1168. filters.append(f'({status_conditions})')
  1169. # 如果没有过滤条件,查询所有
  1170. filter_expr = ' and '.join(filters) if filters else 'id != ""'
  1171. relation_filters = {}
  1172. if requirement_id: relation_filters['requirement_id'] = requirement_id
  1173. if capability_id: relation_filters['capability_id'] = capability_id
  1174. if tool_id: relation_filters['tool_id'] = tool_id
  1175. # 查询 Milvus/PG(先获取所有符合条件的数据)
  1176. # limit 是总数限制,我们需要获取足够多的数据来支持分页
  1177. max_limit = 10000 # 设置一个合理的上限
  1178. results = pg_store.query(filter_expr, limit=max_limit, relation_filters=relation_filters)
  1179. # 转换为可序列化的格式
  1180. serialized_results = [to_serializable(r) for r in results]
  1181. # 按 created_at 降序排序(最新的在前)
  1182. serialized_results.sort(key=lambda x: x.get('created_at', 0), reverse=True)
  1183. # 计算分页
  1184. total = len(serialized_results)
  1185. total_pages = (total + page_size - 1) // page_size # 向上取整
  1186. start_idx = (page - 1) * page_size
  1187. end_idx = start_idx + page_size
  1188. page_results = serialized_results[start_idx:end_idx]
  1189. return {
  1190. "results": page_results,
  1191. "pagination": {
  1192. "page": page,
  1193. "page_size": page_size,
  1194. "total": total,
  1195. "total_pages": total_pages
  1196. }
  1197. }
  1198. except Exception as e:
  1199. print(f"[List Knowledge] 错误: {e}")
  1200. raise HTTPException(status_code=500, detail=str(e))
  1201. @app.get("/api/knowledge/meta/tags")
  1202. def get_all_tags():
  1203. """获取所有已有的 tags"""
  1204. try:
  1205. # 查询所有知识
  1206. results = pg_store.query('id != ""', limit=10000)
  1207. all_tags = set()
  1208. for item in results:
  1209. # 转换为标准字典
  1210. serialized_item = to_serializable(item)
  1211. tags_dict = serialized_item.get("tags", {})
  1212. if isinstance(tags_dict, dict):
  1213. for key in tags_dict.keys():
  1214. all_tags.add(key)
  1215. return {"tags": sorted(list(all_tags))}
  1216. except Exception as e:
  1217. print(f"[Get Tags] 错误: {e}")
  1218. raise HTTPException(status_code=500, detail=str(e))
  1219. @app.get("/api/knowledge/pending")
  1220. def get_pending_knowledge(limit: int = Query(default=50, ge=1, le=200)):
  1221. """查询待处理队列(pending + processing + dedup_passed + analyzing)"""
  1222. try:
  1223. pending = pg_store.query(
  1224. 'status == "pending" or status == "processing" or status == "dedup_passed" or status == "analyzing"',
  1225. limit=limit
  1226. )
  1227. serialized = [to_serializable(r) for r in pending]
  1228. return {"results": serialized, "count": len(serialized)}
  1229. except Exception as e:
  1230. print(f"[Pending] 错误: {e}")
  1231. raise HTTPException(status_code=500, detail=str(e))
  1232. @app.post("/api/knowledge/process")
  1233. async def trigger_process(force: bool = Query(default=False)):
  1234. """手动触发去重处理。force=true 时先回滚所有 processing → pending,analyzing → dedup_passed"""
  1235. try:
  1236. if force:
  1237. processing = pg_store.query('status == "processing"', limit=200)
  1238. for item in processing:
  1239. pg_store.update(item["id"], {"status": "pending", "updated_at": int(time.time())})
  1240. print(f"[Manual Process] 回滚 {len(processing)} 条 processing → pending")
  1241. analyzing = pg_store.query('status == "analyzing"', limit=200)
  1242. for item in analyzing:
  1243. pg_store.update(item["id"], {"status": "dedup_passed", "updated_at": int(time.time())})
  1244. print(f"[Manual Process] 回滚 {len(analyzing)} 条 analyzing → dedup_passed")
  1245. asyncio.create_task(knowledge_processor.process_pending())
  1246. return {"status": "ok", "message": "处理任务已触发"}
  1247. except Exception as e:
  1248. print(f"[Manual Process] 错误: {e}")
  1249. raise HTTPException(status_code=500, detail=str(e))
  1250. @app.post("/api/knowledge/migrate")
  1251. async def migrate_knowledge_schema():
  1252. """手动触发 schema 迁移(PostgreSQL不需要此功能)"""
  1253. return {"status": "ok", "message": "PostgreSQL不需要schema迁移"}
  1254. @app.get("/api/knowledge/status/{knowledge_id}")
  1255. def get_knowledge_status(knowledge_id: str):
  1256. """查询单条知识的处理状态和关系"""
  1257. try:
  1258. result = pg_store.get_by_id(knowledge_id)
  1259. if not result:
  1260. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1261. serialized = to_serializable(result)
  1262. return {
  1263. "id": knowledge_id,
  1264. "status": serialized.get("status", "approved"),
  1265. "relations": serialized.get("relations", []),
  1266. "created_at": serialized.get("created_at"),
  1267. "updated_at": serialized.get("updated_at"),
  1268. }
  1269. except HTTPException:
  1270. raise
  1271. except Exception as e:
  1272. print(f"[Knowledge Status] 错误: {e}")
  1273. raise HTTPException(status_code=500, detail=str(e))
  1274. @app.get("/api/knowledge/{knowledge_id}")
  1275. def get_knowledge(knowledge_id: str):
  1276. """获取单条知识"""
  1277. try:
  1278. result = pg_store.get_by_id(knowledge_id)
  1279. if not result:
  1280. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1281. return to_serializable(result)
  1282. except HTTPException:
  1283. raise
  1284. except Exception as e:
  1285. print(f"[Get Knowledge] 错误: {e}")
  1286. raise HTTPException(status_code=500, detail=str(e))
  1287. async def _evolve_knowledge_with_llm(old_content: str, feedback: str) -> str:
  1288. """使用 LLM 进行知识进化重写"""
  1289. prompt = KNOWLEDGE_EVOLVE_PROMPT_TEMPLATE.format(
  1290. old_content=old_content,
  1291. feedback=feedback
  1292. )
  1293. try:
  1294. response = await _dedup_llm(
  1295. messages=[{"role": "user", "content": prompt}],
  1296. )
  1297. evolved = response.get("content", "").strip()
  1298. if len(evolved) < 5:
  1299. raise ValueError("LLM output too short")
  1300. return evolved
  1301. except Exception as e:
  1302. print(f"知识进化失败,采用追加模式回退: {e}")
  1303. return f"{old_content}\n\n---\n[Update {datetime.now().strftime('%Y-%m-%d')}]: {feedback}"
  1304. @app.put("/api/knowledge/{knowledge_id}")
  1305. async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
  1306. """更新知识评估,支持知识进化"""
  1307. try:
  1308. # 获取现有知识
  1309. existing = pg_store.get_by_id(knowledge_id)
  1310. if not existing:
  1311. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1312. eval_data = existing.get("eval", {})
  1313. # 更新评分
  1314. if update.update_score is not None:
  1315. eval_data["score"] = update.update_score
  1316. # 添加有效案例
  1317. if update.add_helpful_case:
  1318. eval_data["helpful"] = eval_data.get("helpful", 0) + 1
  1319. if "helpful_history" not in eval_data:
  1320. eval_data["helpful_history"] = []
  1321. eval_data["helpful_history"].append(update.add_helpful_case)
  1322. # 添加有害案例
  1323. if update.add_harmful_case:
  1324. eval_data["harmful"] = eval_data.get("harmful", 0) + 1
  1325. if "harmful_history" not in eval_data:
  1326. eval_data["harmful_history"] = []
  1327. eval_data["harmful_history"].append(update.add_harmful_case)
  1328. # 知识进化
  1329. content = existing["content"]
  1330. need_reembed = False
  1331. if update.evolve_feedback:
  1332. content = await _evolve_knowledge_with_llm(content, update.evolve_feedback)
  1333. eval_data["helpful"] = eval_data.get("helpful", 0) + 1
  1334. need_reembed = True
  1335. # 准备更新数据
  1336. updates = {
  1337. "content": content,
  1338. "eval": eval_data,
  1339. }
  1340. # 如果内容变化,重新生成向量
  1341. if need_reembed:
  1342. embedding = await get_embedding(existing['task'])
  1343. updates["task_embedding"] = embedding
  1344. # 更新 Milvus
  1345. pg_store.update(knowledge_id, updates)
  1346. return {"status": "ok", "knowledge_id": knowledge_id}
  1347. except HTTPException:
  1348. raise
  1349. except Exception as e:
  1350. print(f"[Update Knowledge] 错误: {e}")
  1351. raise HTTPException(status_code=500, detail=str(e))
  1352. @app.patch("/api/knowledge/{knowledge_id}")
  1353. async def patch_knowledge(knowledge_id: str, patch: KnowledgePatchIn):
  1354. """直接编辑知识字段"""
  1355. try:
  1356. # 获取现有知识
  1357. existing = pg_store.get_by_id(knowledge_id)
  1358. if not existing:
  1359. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1360. updates = {}
  1361. need_reembed = False
  1362. need_content_reembed = False
  1363. if patch.task is not None:
  1364. updates["task"] = patch.task
  1365. need_reembed = True
  1366. if patch.content is not None:
  1367. updates["content"] = patch.content
  1368. need_content_reembed = True
  1369. if patch.types is not None:
  1370. updates["types"] = patch.types
  1371. if patch.tags is not None:
  1372. updates["tags"] = patch.tags
  1373. # 同时更新 tag_keys
  1374. updates["tag_keys"] = list(patch.tags.keys()) if isinstance(patch.tags, dict) else []
  1375. if patch.scopes is not None:
  1376. updates["scopes"] = patch.scopes
  1377. if patch.owner is not None:
  1378. updates["owner"] = patch.owner
  1379. if patch.capability_ids is not None:
  1380. updates["capability_ids"] = patch.capability_ids
  1381. if patch.tool_ids is not None:
  1382. updates["tool_ids"] = patch.tool_ids
  1383. if not updates:
  1384. return {"status": "ok", "knowledge_id": knowledge_id}
  1385. # 如果 task 变化,重新生成 task_embedding
  1386. if need_reembed:
  1387. task = updates.get("task", existing["task"])
  1388. embedding = await get_embedding(task)
  1389. updates["task_embedding"] = embedding
  1390. # 如果 content 变化,重新生成 content_embedding
  1391. if need_content_reembed:
  1392. content = updates.get("content", existing["content"])
  1393. content_embedding = await get_embedding(content)
  1394. updates["content_embedding"] = content_embedding
  1395. # 更新 Milvus
  1396. pg_store.update(knowledge_id, updates)
  1397. return {"status": "ok", "knowledge_id": knowledge_id}
  1398. except HTTPException:
  1399. raise
  1400. except Exception as e:
  1401. print(f"[Patch Knowledge] 错误: {e}")
  1402. raise HTTPException(status_code=500, detail=str(e))
  1403. @app.delete("/api/knowledge/{knowledge_id}")
  1404. def delete_knowledge(knowledge_id: str):
  1405. """删除单条知识"""
  1406. try:
  1407. # 检查知识是否存在
  1408. existing = pg_store.get_by_id(knowledge_id)
  1409. if not existing:
  1410. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1411. # 从 PostgreSQL 删除
  1412. pg_store.delete(knowledge_id)
  1413. print(f"[Delete Knowledge] 已删除知识: {knowledge_id}")
  1414. return {"status": "ok", "knowledge_id": knowledge_id}
  1415. except HTTPException:
  1416. raise
  1417. except Exception as e:
  1418. print(f"[Delete Knowledge] 错误: {e}")
  1419. raise HTTPException(status_code=500, detail=str(e))
  1420. @app.post("/api/knowledge/batch_delete")
  1421. def batch_delete_knowledge(knowledge_ids: List[str] = Body(...)):
  1422. """批量删除知识"""
  1423. try:
  1424. if not knowledge_ids:
  1425. raise HTTPException(status_code=400, detail="knowledge_ids cannot be empty")
  1426. deleted_count = 0
  1427. for kid in knowledge_ids:
  1428. pg_store.delete(kid)
  1429. deleted_count += 1
  1430. print(f"[Batch Delete] 已删除 {deleted_count} 条知识")
  1431. return {"status": "ok", "deleted_count": deleted_count}
  1432. except HTTPException:
  1433. raise
  1434. except Exception as e:
  1435. print(f"[Batch Delete] 错误: {e}")
  1436. raise HTTPException(status_code=500, detail=str(e))
  1437. @app.post("/api/knowledge/batch_verify")
  1438. async def batch_verify_knowledge(batch: KnowledgeBatchVerifyIn):
  1439. """批量验证通过(approved → checked)"""
  1440. if not batch.knowledge_ids:
  1441. return {"status": "ok", "updated": 0}
  1442. try:
  1443. now_iso = datetime.now(timezone.utc).isoformat()
  1444. updated_count = 0
  1445. for kid in batch.knowledge_ids:
  1446. existing = pg_store.get_by_id(kid)
  1447. if not existing:
  1448. continue
  1449. eval_data = existing.get("eval") or {}
  1450. eval_data["verification"] = {
  1451. "status": "checked",
  1452. "verified_by": batch.verified_by,
  1453. "verified_at": now_iso,
  1454. "note": None,
  1455. "issue_type": None,
  1456. "issue_action": None,
  1457. }
  1458. pg_store.update(kid, {"eval": eval_data, "status": "checked", "updated_at": int(time.time())})
  1459. updated_count += 1
  1460. return {"status": "ok", "updated": updated_count}
  1461. except Exception as e:
  1462. print(f"[Batch Verify] 错误: {e}")
  1463. raise HTTPException(status_code=500, detail=str(e))
  1464. @app.post("/api/knowledge/{knowledge_id}/verify")
  1465. async def verify_knowledge(knowledge_id: str, verify: KnowledgeVerifyIn):
  1466. """知识验证:approve 切换 approved↔checked,reject 设为 rejected"""
  1467. try:
  1468. existing = pg_store.get_by_id(knowledge_id)
  1469. if not existing:
  1470. raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
  1471. current_status = existing.get("status", "approved")
  1472. if verify.action == "approve":
  1473. # checked → approved(取消验证),其他 → checked
  1474. new_status = "approved" if current_status == "checked" else "checked"
  1475. pg_store.update(knowledge_id, {
  1476. "status": new_status,
  1477. "updated_at": int(time.time())
  1478. })
  1479. return {"status": "ok", "new_status": new_status,
  1480. "message": "已取消验证" if new_status == "approved" else "验证通过"}
  1481. elif verify.action == "reject":
  1482. pg_store.update(knowledge_id, {
  1483. "status": "rejected",
  1484. "updated_at": int(time.time())
  1485. })
  1486. return {"status": "ok", "new_status": "rejected", "message": "已拒绝"}
  1487. else:
  1488. raise HTTPException(status_code=400, detail=f"Unknown action: {verify.action}")
  1489. except HTTPException:
  1490. raise
  1491. except Exception as e:
  1492. print(f"[Verify Knowledge] 错误: {e}")
  1493. raise HTTPException(status_code=500, detail=str(e))
  1494. @app.post("/api/knowledge/batch_update")
  1495. async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
  1496. """批量反馈知识有效性"""
  1497. if not batch.feedback_list:
  1498. return {"status": "ok", "updated": 0}
  1499. try:
  1500. # 先处理无需进化的,收集需要进化的
  1501. evolution_tasks = [] # [(knowledge_id, old_content, feedback, eval_data)]
  1502. simple_updates = [] # [(knowledge_id, is_effective, eval_data)]
  1503. for item in batch.feedback_list:
  1504. knowledge_id = item.get("knowledge_id")
  1505. is_effective = item.get("is_effective")
  1506. feedback = item.get("feedback", "")
  1507. if not knowledge_id:
  1508. continue
  1509. existing = pg_store.get_by_id(knowledge_id)
  1510. if not existing:
  1511. continue
  1512. eval_data = existing.get("eval", {})
  1513. if is_effective and feedback:
  1514. evolution_tasks.append((knowledge_id, existing["content"], feedback, eval_data, existing["task"]))
  1515. else:
  1516. simple_updates.append((knowledge_id, is_effective, eval_data))
  1517. # 执行简单更新
  1518. for knowledge_id, is_effective, eval_data in simple_updates:
  1519. if is_effective:
  1520. eval_data["helpful"] = eval_data.get("helpful", 0) + 1
  1521. else:
  1522. eval_data["harmful"] = eval_data.get("harmful", 0) + 1
  1523. pg_store.update(knowledge_id, {"eval": eval_data})
  1524. # 并发执行知识进化
  1525. if evolution_tasks:
  1526. print(f"🧬 并发处理 {len(evolution_tasks)} 条知识进化...")
  1527. evolved_results = await asyncio.gather(
  1528. *[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _, _ in evolution_tasks]
  1529. )
  1530. for (knowledge_id, _, _, eval_data, task), evolved_content in zip(evolution_tasks, evolved_results):
  1531. eval_data["helpful"] = eval_data.get("helpful", 0) + 1
  1532. # 重新生成向量(只基于 task)
  1533. embedding = await get_embedding(task)
  1534. pg_store.update(knowledge_id, {
  1535. "content": evolved_content,
  1536. "eval": eval_data,
  1537. "task_embedding": embedding
  1538. })
  1539. return {"status": "ok", "updated": len(simple_updates) + len(evolution_tasks)}
  1540. except Exception as e:
  1541. print(f"[Batch Update] 错误: {e}")
  1542. raise HTTPException(status_code=500, detail=str(e))
  1543. @app.post("/api/knowledge/slim")
  1544. async def slim_knowledge(model: str = "google/gemini-2.5-flash-lite"):
  1545. """知识库瘦身:合并语义相似知识"""
  1546. try:
  1547. # 获取所有知识
  1548. all_knowledge = pg_store.query('id != ""', limit=10000)
  1549. # 转换为可序列化的格式
  1550. all_knowledge = [to_serializable(item) for item in all_knowledge]
  1551. if len(all_knowledge) < 2:
  1552. return {"status": "ok", "message": f"知识库仅有 {len(all_knowledge)} 条,无需瘦身"}
  1553. # 构造发给大模型的内容
  1554. entries_text = ""
  1555. for item in all_knowledge:
  1556. eval_data = item.get("eval", {})
  1557. types = item.get("types", [])
  1558. entries_text += f"[ID: {item['id']}] [Types: {','.join(types)}] "
  1559. entries_text += f"[Helpful: {eval_data.get('helpful', 0)}, Harmful: {eval_data.get('harmful', 0)}] [Score: {eval_data.get('score', 3)}]\n"
  1560. entries_text += f"Task: {item['task']}\n"
  1561. entries_text += f"Content: {item['content'][:200]}...\n\n"
  1562. prompt = KNOWLEDGE_SLIM_PROMPT_TEMPLATE.format(entries_text=entries_text)
  1563. print(f"\n[知识瘦身] 正在调用 {model} 分析 {len(all_knowledge)} 条知识...")
  1564. slim_llm = create_openrouter_llm_call(model=model)
  1565. response = await slim_llm(
  1566. messages=[{"role": "user", "content": prompt}],
  1567. )
  1568. content = response.get("content", "").strip()
  1569. if not content:
  1570. raise HTTPException(status_code=500, detail="LLM 返回为空")
  1571. # 解析大模型输出
  1572. report_line = ""
  1573. new_entries = []
  1574. blocks = [b.strip() for b in content.split("===") if b.strip()]
  1575. for block in blocks:
  1576. if block.startswith("REPORT:"):
  1577. report_line = block
  1578. continue
  1579. lines = block.split("\n")
  1580. kid, types, helpful, harmful, score, task, content_lines = None, [], 0, 0, 3, "", []
  1581. current_field = None
  1582. for line in lines:
  1583. if line.startswith("ID:"):
  1584. kid = line[3:].strip()
  1585. current_field = None
  1586. elif line.startswith("TYPES:"):
  1587. types_str = line[6:].strip()
  1588. types = [t.strip() for t in types_str.split(",") if t.strip()]
  1589. current_field = None
  1590. elif line.startswith("HELPFUL:"):
  1591. try:
  1592. helpful = int(line[8:].strip())
  1593. except Exception:
  1594. helpful = 0
  1595. current_field = None
  1596. elif line.startswith("HARMFUL:"):
  1597. try:
  1598. harmful = int(line[8:].strip())
  1599. except Exception:
  1600. harmful = 0
  1601. current_field = None
  1602. elif line.startswith("SCORE:"):
  1603. try:
  1604. score = int(line[6:].strip())
  1605. except Exception:
  1606. score = 3
  1607. current_field = None
  1608. elif line.startswith("TASK:"):
  1609. task = line[5:].strip()
  1610. current_field = "task"
  1611. elif line.startswith("CONTENT:"):
  1612. content_lines.append(line[8:].strip())
  1613. current_field = "content"
  1614. elif current_field == "task":
  1615. task += "\n" + line
  1616. elif current_field == "content":
  1617. content_lines.append(line)
  1618. if kid and content_lines:
  1619. new_entries.append({
  1620. "id": kid,
  1621. "types": types if types else ["strategy"],
  1622. "helpful": helpful,
  1623. "harmful": harmful,
  1624. "score": score,
  1625. "task": task.strip(),
  1626. "content": "\n".join(content_lines).strip()
  1627. })
  1628. if not new_entries:
  1629. raise HTTPException(status_code=500, detail="解析大模型输出失败")
  1630. # 生成向量并重建知识库
  1631. print(f"[知识瘦身] 正在为 {len(new_entries)} 条知识生成向量...")
  1632. # 批量生成向量(只基于 task)
  1633. texts = [e['task'] for e in new_entries]
  1634. embeddings = await get_embeddings_batch(texts)
  1635. # 清空并重建(PostgreSQL使用TRUNCATE)
  1636. cursor = pg_store._get_cursor()
  1637. try:
  1638. # 先清关联表再清主表
  1639. for jt in ('requirement_knowledge', 'capability_knowledge', 'tool_knowledge',
  1640. 'knowledge_resource', 'knowledge_relation'):
  1641. cursor.execute(f"TRUNCATE TABLE {jt}")
  1642. cursor.execute("TRUNCATE TABLE knowledge")
  1643. pg_store.conn.commit()
  1644. finally:
  1645. cursor.close()
  1646. knowledge_list = []
  1647. for e, embedding in zip(new_entries, embeddings):
  1648. eval_data = {
  1649. "score": e["score"],
  1650. "helpful": e["helpful"],
  1651. "harmful": e["harmful"],
  1652. "confidence": 0.9,
  1653. "helpful_history": [],
  1654. "harmful_history": []
  1655. }
  1656. source = {
  1657. "name": "slim",
  1658. "category": "exp",
  1659. "urls": [],
  1660. "agent_id": "slim",
  1661. "submitted_by": "system",
  1662. "timestamp": datetime.now(timezone.utc).isoformat()
  1663. }
  1664. knowledge_list.append({
  1665. "id": e["id"],
  1666. "task_embedding": embedding,
  1667. "message_id": "",
  1668. "task": e["task"],
  1669. "content": e["content"],
  1670. "types": e["types"],
  1671. "tags": {},
  1672. "tag_keys": [],
  1673. "scopes": ["org:cybertogether"],
  1674. "owner": "agent:slim",
  1675. "source": source,
  1676. "eval": eval_data,
  1677. "created_at": now,
  1678. "updated_at": now,
  1679. "status": "approved",
  1680. })
  1681. pg_store.insert_batch(knowledge_list)
  1682. result_msg = f"瘦身完成:{len(all_knowledge)} → {len(new_entries)} 条知识"
  1683. if report_line:
  1684. result_msg += f"\n{report_line}"
  1685. print(f"[知识瘦身] {result_msg}")
  1686. return {"status": "ok", "before": len(all_knowledge), "after": len(new_entries), "report": report_line}
  1687. except HTTPException:
  1688. raise
  1689. except Exception as e:
  1690. print(f"[Slim Knowledge] 错误: {e}")
  1691. raise HTTPException(status_code=500, detail=str(e))
  1692. @app.post("/api/extract")
  1693. async def extract_knowledge_from_messages(extract_req: MessageExtractIn, background_tasks: BackgroundTasks):
  1694. """从消息历史中提取知识(LLM 分析)"""
  1695. if not extract_req.submitted_by:
  1696. raise HTTPException(status_code=400, detail="submitted_by is required")
  1697. messages = extract_req.messages
  1698. if not messages or len(messages) == 0:
  1699. return {"status": "ok", "extracted_count": 0, "knowledge_ids": []}
  1700. # 构造消息历史文本
  1701. messages_text = ""
  1702. for msg in messages:
  1703. role = msg.get("role", "unknown")
  1704. content = msg.get("content", "")
  1705. messages_text += f"[{role}]: {content}\n\n"
  1706. # LLM 提取知识
  1707. prompt = MESSAGE_EXTRACT_PROMPT_TEMPLATE.format(messages_text=messages_text)
  1708. try:
  1709. print(f"\n[Extract] 正在从 {len(messages)} 条消息中提取知识...")
  1710. response = await _dedup_llm(
  1711. messages=[{"role": "user", "content": prompt}],
  1712. )
  1713. content = response.get("content", "").strip()
  1714. # 尝试解析 JSON
  1715. # 移除可能的 markdown 代码块标记
  1716. if content.startswith("```json"):
  1717. content = content[7:]
  1718. if content.startswith("```"):
  1719. content = content[3:]
  1720. if content.endswith("```"):
  1721. content = content[:-3]
  1722. content = content.strip()
  1723. extracted_knowledge = json.loads(content)
  1724. if not isinstance(extracted_knowledge, list):
  1725. raise ValueError("LLM output is not a list")
  1726. if not extracted_knowledge:
  1727. return {"status": "ok", "extracted_count": 0, "knowledge_ids": []}
  1728. # 批量生成向量(只基于 task)
  1729. texts = [item.get('task', '') for item in extracted_knowledge]
  1730. embeddings = await get_embeddings_batch(texts)
  1731. # 保存提取的知识
  1732. knowledge_ids = []
  1733. now = int(time.time())
  1734. knowledge_list = []
  1735. for item, embedding in zip(extracted_knowledge, embeddings):
  1736. task = item.get("task", "")
  1737. knowledge_content = item.get("content", "")
  1738. types = item.get("types", ["strategy"])
  1739. score = item.get("score", 3)
  1740. if not task or not knowledge_content:
  1741. continue
  1742. # 生成 ID
  1743. timestamp = datetime.now().strftime('%Y%m%d-%H%M%S')
  1744. random_suffix = uuid.uuid4().hex[:4]
  1745. knowledge_id = f"knowledge-{timestamp}-{random_suffix}"
  1746. # 准备数据
  1747. source = {
  1748. "name": "message_extraction",
  1749. "category": "exp",
  1750. "urls": [],
  1751. "agent_id": extract_req.agent_id,
  1752. "submitted_by": extract_req.submitted_by,
  1753. "timestamp": datetime.now(timezone.utc).isoformat(),
  1754. "session_key": extract_req.session_key
  1755. }
  1756. eval_data = {
  1757. "score": score,
  1758. "helpful": 1,
  1759. "harmful": 0,
  1760. "confidence": 0.7,
  1761. "helpful_history": [],
  1762. "harmful_history": []
  1763. }
  1764. knowledge_list.append({
  1765. "id": knowledge_id,
  1766. "task_embedding": embedding,
  1767. "message_id": "",
  1768. "task": task,
  1769. "content": knowledge_content,
  1770. "types": types,
  1771. "tags": {},
  1772. "tag_keys": [],
  1773. "scopes": ["org:cybertogether"],
  1774. "owner": extract_req.submitted_by,
  1775. "source": source,
  1776. "eval": eval_data,
  1777. "created_at": now,
  1778. "updated_at": now,
  1779. "status": "pending",
  1780. })
  1781. knowledge_ids.append(knowledge_id)
  1782. # 批量插入
  1783. if knowledge_list:
  1784. pg_store.insert_batch(knowledge_list)
  1785. background_tasks.add_task(knowledge_processor.process_pending)
  1786. print(f"[Extract] 成功提取并保存 {len(knowledge_ids)} 条知识")
  1787. return {
  1788. "status": "ok",
  1789. "extracted_count": len(knowledge_ids),
  1790. "knowledge_ids": knowledge_ids
  1791. }
  1792. except json.JSONDecodeError as e:
  1793. print(f"[Extract] JSON 解析失败: {e}")
  1794. print(f"[Extract] LLM 输出: {content[:500]}")
  1795. return {"status": "error", "error": "Failed to parse LLM output", "extracted_count": 0}
  1796. except Exception as e:
  1797. print(f"[Extract] 提取失败: {e}")
  1798. return {"status": "error", "error": str(e), "extracted_count": 0}
  1799. # ===== Tool API =====
  1800. @app.post("/api/tool", status_code=201)
  1801. async def create_tool(tool: ToolIn):
  1802. """创建或更新工具"""
  1803. try:
  1804. now = int(time.time())
  1805. embedding = await get_embedding(f"{tool.name} {tool.introduction}")
  1806. pg_tool_store.insert_or_update({
  1807. 'id': tool.id,
  1808. 'name': tool.name,
  1809. 'version': tool.version,
  1810. 'introduction': tool.introduction,
  1811. 'tutorial': tool.tutorial,
  1812. 'input': tool.input,
  1813. 'output': tool.output,
  1814. 'updated_time': now,
  1815. 'status': tool.status,
  1816. 'capability_ids': tool.capability_ids,
  1817. 'knowledge_ids': tool.knowledge_ids,
  1818. 'provider_ids': tool.provider_ids,
  1819. 'embedding': embedding,
  1820. })
  1821. return {"status": "ok", "id": tool.id}
  1822. except Exception as e:
  1823. raise HTTPException(status_code=500, detail=str(e))
  1824. @app.get("/api/tool")
  1825. def list_tools(
  1826. status: Optional[str] = Query(None),
  1827. limit: int = Query(100, ge=1, le=1000),
  1828. offset: int = Query(0, ge=0),
  1829. ):
  1830. """列出工具"""
  1831. try:
  1832. results = pg_tool_store.list_all(limit=limit, offset=offset, status=status)
  1833. total = pg_tool_store.count(status=status)
  1834. return {"results": results, "total": total}
  1835. except Exception as e:
  1836. raise HTTPException(status_code=500, detail=str(e))
  1837. @app.get("/api/tool/search")
  1838. async def search_tools(
  1839. q: str = Query(..., description="查询文本"),
  1840. top_k: int = Query(5, ge=1, le=100),
  1841. status: Optional[str] = None,
  1842. ):
  1843. """向量检索工具"""
  1844. try:
  1845. query_embedding = await get_embedding(q)
  1846. results = pg_tool_store.search(query_embedding, limit=top_k, status=status)
  1847. return {"results": results, "count": len(results)}
  1848. except Exception as e:
  1849. raise HTTPException(status_code=500, detail=str(e))
  1850. @app.get("/api/tool/{tool_id:path}")
  1851. def get_tool(tool_id: str):
  1852. """获取单个工具详情"""
  1853. try:
  1854. result = pg_tool_store.get_by_id(tool_id)
  1855. if not result:
  1856. raise HTTPException(status_code=404, detail=f"Tool not found: {tool_id}")
  1857. return result
  1858. except HTTPException:
  1859. raise
  1860. except Exception as e:
  1861. raise HTTPException(status_code=500, detail=str(e))
  1862. @app.patch("/api/tool/{tool_id:path}")
  1863. async def patch_tool(tool_id: str, patch: ToolPatchIn):
  1864. """更新工具字段"""
  1865. try:
  1866. if not pg_tool_store.get_by_id(tool_id):
  1867. raise HTTPException(status_code=404, detail=f"Tool not found: {tool_id}")
  1868. updates = {}
  1869. need_reembed = False
  1870. for field in ('name', 'version', 'introduction', 'tutorial', 'input', 'output',
  1871. 'status', 'capability_ids', 'knowledge_ids', 'provider_ids'):
  1872. value = getattr(patch, field)
  1873. if value is not None:
  1874. updates[field] = value
  1875. if field in ('name', 'introduction'):
  1876. need_reembed = True
  1877. if not updates:
  1878. return {"status": "ok", "id": tool_id}
  1879. updates['updated_time'] = int(time.time())
  1880. if need_reembed:
  1881. existing = pg_tool_store.get_by_id(tool_id)
  1882. name = updates.get('name', existing['name'])
  1883. intro = updates.get('introduction', existing['introduction'])
  1884. updates['embedding'] = await get_embedding(f"{name} {intro}")
  1885. pg_tool_store.update(tool_id, updates)
  1886. return {"status": "ok", "id": tool_id}
  1887. except HTTPException:
  1888. raise
  1889. except Exception as e:
  1890. raise HTTPException(status_code=500, detail=str(e))
  1891. @app.delete("/api/tool/{tool_id:path}")
  1892. def delete_tool(tool_id: str):
  1893. """删除工具"""
  1894. try:
  1895. if not pg_tool_store.get_by_id(tool_id):
  1896. raise HTTPException(status_code=404, detail=f"Tool not found: {tool_id}")
  1897. pg_tool_store.delete(tool_id)
  1898. return {"status": "ok", "id": tool_id}
  1899. except HTTPException:
  1900. raise
  1901. except Exception as e:
  1902. raise HTTPException(status_code=500, detail=str(e))
  1903. # ===== Capability API =====
  1904. @app.post("/api/capability", status_code=201)
  1905. async def create_capability(cap: CapabilityIn):
  1906. """创建或更新原子能力"""
  1907. try:
  1908. embedding = await get_embedding(f"{cap.name} {cap.description}")
  1909. pg_capability_store.insert_or_update({
  1910. 'id': cap.id,
  1911. 'name': cap.name,
  1912. 'criterion': cap.criterion,
  1913. 'description': cap.description,
  1914. 'requirement_ids': cap.requirement_ids,
  1915. 'implements': cap.implements,
  1916. 'tool_ids': cap.tool_ids,
  1917. 'knowledge_ids': cap.knowledge_ids,
  1918. 'embedding': embedding,
  1919. })
  1920. return {"status": "ok", "id": cap.id}
  1921. except Exception as e:
  1922. raise HTTPException(status_code=500, detail=str(e))
  1923. @app.get("/api/capability")
  1924. def list_capabilities(
  1925. limit: int = Query(100, ge=1, le=1000),
  1926. offset: int = Query(0, ge=0),
  1927. ):
  1928. """列出原子能力"""
  1929. try:
  1930. results = pg_capability_store.list_all(limit=limit, offset=offset)
  1931. total = pg_capability_store.count()
  1932. return {"results": results, "total": total}
  1933. except Exception as e:
  1934. raise HTTPException(status_code=500, detail=str(e))
  1935. @app.get("/api/capability/search")
  1936. async def search_capabilities(
  1937. q: str = Query(..., description="查询文本"),
  1938. top_k: int = Query(5, ge=1, le=100),
  1939. ):
  1940. """向量检索原子能力"""
  1941. try:
  1942. query_embedding = await get_embedding(q)
  1943. results = pg_capability_store.search(query_embedding, limit=top_k)
  1944. return {"results": results, "count": len(results)}
  1945. except Exception as e:
  1946. raise HTTPException(status_code=500, detail=str(e))
  1947. @app.get("/api/capability/{cap_id}")
  1948. def get_capability(cap_id: str):
  1949. """获取单个原子能力"""
  1950. try:
  1951. result = pg_capability_store.get_by_id(cap_id)
  1952. if not result:
  1953. raise HTTPException(status_code=404, detail=f"Capability not found: {cap_id}")
  1954. return result
  1955. except HTTPException:
  1956. raise
  1957. except Exception as e:
  1958. raise HTTPException(status_code=500, detail=str(e))
  1959. @app.patch("/api/capability/{cap_id}")
  1960. async def patch_capability(cap_id: str, patch: CapabilityPatchIn):
  1961. """更新原子能力字段"""
  1962. try:
  1963. existing = pg_capability_store.get_by_id(cap_id)
  1964. if not existing:
  1965. raise HTTPException(status_code=404, detail=f"Capability not found: {cap_id}")
  1966. updates = {}
  1967. need_reembed = False
  1968. for field in ('name', 'criterion', 'description', 'requirement_ids',
  1969. 'implements', 'tool_ids', 'knowledge_ids'):
  1970. value = getattr(patch, field)
  1971. if value is not None:
  1972. updates[field] = value
  1973. if field in ('name', 'description'):
  1974. need_reembed = True
  1975. if not updates:
  1976. return {"status": "ok", "id": cap_id}
  1977. if need_reembed:
  1978. name = updates.get('name', existing['name'])
  1979. desc = updates.get('description', existing['description'])
  1980. updates['embedding'] = await get_embedding(f"{name} {desc}")
  1981. pg_capability_store.update(cap_id, updates)
  1982. return {"status": "ok", "id": cap_id}
  1983. except HTTPException:
  1984. raise
  1985. except Exception as e:
  1986. raise HTTPException(status_code=500, detail=str(e))
  1987. @app.delete("/api/capability/{cap_id}")
  1988. def delete_capability(cap_id: str):
  1989. """删除原子能力"""
  1990. try:
  1991. if not pg_capability_store.get_by_id(cap_id):
  1992. raise HTTPException(status_code=404, detail=f"Capability not found: {cap_id}")
  1993. pg_capability_store.delete(cap_id)
  1994. return {"status": "ok", "id": cap_id}
  1995. except HTTPException:
  1996. raise
  1997. except Exception as e:
  1998. raise HTTPException(status_code=500, detail=str(e))
  1999. # ===== Requirement API =====
  2000. @app.post("/api/requirement", status_code=201)
  2001. async def create_requirement(req: RequirementIn):
  2002. """创建或更新需求"""
  2003. try:
  2004. embedding = await get_embedding(req.description)
  2005. pg_requirement_store.insert_or_update({
  2006. 'id': req.id,
  2007. 'description': req.description,
  2008. 'capability_ids': req.capability_ids,
  2009. 'knowledge_ids': req.knowledge_ids,
  2010. 'source_nodes': req.source_nodes,
  2011. 'status': req.status,
  2012. 'match_result': req.match_result,
  2013. 'embedding': embedding,
  2014. })
  2015. return {"status": "ok", "id": req.id}
  2016. except Exception as e:
  2017. raise HTTPException(status_code=500, detail=str(e))
  2018. @app.get("/api/requirement")
  2019. def list_requirements(
  2020. status: Optional[str] = Query(None),
  2021. limit: int = Query(100, ge=1, le=1000),
  2022. offset: int = Query(0, ge=0),
  2023. ):
  2024. """列出需求"""
  2025. try:
  2026. results = pg_requirement_store.list_all(limit=limit, offset=offset, status=status)
  2027. total = pg_requirement_store.count(status=status)
  2028. return {"results": results, "total": total}
  2029. except Exception as e:
  2030. raise HTTPException(status_code=500, detail=str(e))
  2031. @app.get("/api/requirement/search")
  2032. async def search_requirements(
  2033. q: str = Query(..., description="查询文本"),
  2034. top_k: int = Query(5, ge=1, le=100),
  2035. ):
  2036. """向量检索需求"""
  2037. try:
  2038. query_embedding = await get_embedding(q)
  2039. results = pg_requirement_store.search(query_embedding, limit=top_k)
  2040. return {"results": results, "count": len(results)}
  2041. except Exception as e:
  2042. raise HTTPException(status_code=500, detail=str(e))
  2043. @app.get("/api/requirement/{req_id}")
  2044. def get_requirement(req_id: str):
  2045. """获取单个需求"""
  2046. try:
  2047. result = pg_requirement_store.get_by_id(req_id)
  2048. if not result:
  2049. raise HTTPException(status_code=404, detail=f"Requirement not found: {req_id}")
  2050. return result
  2051. except HTTPException:
  2052. raise
  2053. except Exception as e:
  2054. raise HTTPException(status_code=500, detail=str(e))
  2055. @app.patch("/api/requirement/{req_id}")
  2056. async def patch_requirement(req_id: str, patch: RequirementPatchIn):
  2057. """更新需求字段"""
  2058. try:
  2059. existing = pg_requirement_store.get_by_id(req_id)
  2060. if not existing:
  2061. raise HTTPException(status_code=404, detail=f"Requirement not found: {req_id}")
  2062. updates = {}
  2063. need_reembed = False
  2064. for field in ('description', 'capability_ids', 'knowledge_ids', 'source_nodes', 'status', 'match_result'):
  2065. value = getattr(patch, field)
  2066. if value is not None:
  2067. updates[field] = value
  2068. if field == 'description':
  2069. need_reembed = True
  2070. if not updates:
  2071. return {"status": "ok", "id": req_id}
  2072. if need_reembed:
  2073. updates['embedding'] = await get_embedding(updates['description'])
  2074. pg_requirement_store.update(req_id, updates)
  2075. return {"status": "ok", "id": req_id}
  2076. except HTTPException:
  2077. raise
  2078. except Exception as e:
  2079. raise HTTPException(status_code=500, detail=str(e))
  2080. @app.delete("/api/requirement/{req_id}")
  2081. def delete_requirement(req_id: str):
  2082. """删除需求"""
  2083. try:
  2084. if not pg_requirement_store.get_by_id(req_id):
  2085. raise HTTPException(status_code=404, detail=f"Requirement not found: {req_id}")
  2086. pg_requirement_store.delete(req_id)
  2087. return {"status": "ok", "id": req_id}
  2088. except HTTPException:
  2089. raise
  2090. except Exception as e:
  2091. raise HTTPException(status_code=500, detail=str(e))
  2092. @app.post("/api/pattern/posts/batch")
  2093. async def proxy_pattern_posts_batch(payload: PostBatchRequest):
  2094. """代理帖子批量查询,避免前端直接请求外部域名失败后静默回退为纯 ID。"""
  2095. post_ids = [pid for pid in payload.post_ids if pid]
  2096. if not post_ids:
  2097. return {"success": True, "posts": []}
  2098. try:
  2099. async with httpx.AsyncClient(timeout=30.0) as client:
  2100. resp = await client.post(
  2101. "https://pattern.aiddit.com/api/pattern/posts/batch",
  2102. json={"post_ids": post_ids},
  2103. )
  2104. resp.raise_for_status()
  2105. return resp.json()
  2106. except httpx.HTTPStatusError as e:
  2107. raise HTTPException(status_code=e.response.status_code, detail="Pattern posts API returned an error")
  2108. except Exception as e:
  2109. raise HTTPException(status_code=502, detail=f"Failed to fetch pattern posts: {e}")
  2110. @app.get("/api/pattern/itemsets")
  2111. async def proxy_pattern_itemsets(execution_id: int):
  2112. """代理获取 itemsets,避免前端直接请求外部域名"""
  2113. try:
  2114. async with httpx.AsyncClient(timeout=30.0) as client:
  2115. resp = await client.get(
  2116. f"https://pattern.aiddit.com/api/pattern/itemsets?execution_id={execution_id}&page_size=1000",
  2117. )
  2118. resp.raise_for_status()
  2119. return resp.json()
  2120. except httpx.HTTPStatusError as e:
  2121. raise HTTPException(status_code=e.response.status_code, detail="Pattern itemsets API returned an error")
  2122. except Exception as e:
  2123. raise HTTPException(status_code=502, detail=f"Failed to fetch pattern itemsets: {e}")
  2124. @app.get("/")
  2125. def frontend():
  2126. """KnowHub 管理前端"""
  2127. index_file = STATIC_DIR / "index.html"
  2128. if not index_file.exists():
  2129. return HTMLResponse("<h1>KnowHub Frontend Not Found</h1><p>Please ensure knowhub/frontend/dist/index.html exists. Run 'yarn build' in frontend directory.</p>", status_code=404)
  2130. return FileResponse(str(index_file))
  2131. # ===== Strategy API =====
  2132. @app.post("/api/strategy", status_code=201)
  2133. async def submit_strategy(strategy: StrategyIn):
  2134. """创建或更新策略(自动填时间戳 + name/description 向量)"""
  2135. try:
  2136. now = int(time.time())
  2137. data = strategy.model_dump()
  2138. data['created_at'] = data.get('created_at') or now
  2139. data['updated_at'] = now
  2140. data['embedding'] = await get_embedding(f"{strategy.name} {strategy.description}")
  2141. pg_strategy_store.insert_or_update(data)
  2142. return {"success": True, "id": strategy.id}
  2143. except Exception as e:
  2144. raise HTTPException(status_code=500, detail=str(e))
  2145. @app.get("/api/strategy")
  2146. def get_strategies(limit: int = 100, offset: int = 0, status: Optional[str] = None):
  2147. try:
  2148. results = pg_strategy_store.list_all(limit=limit, offset=offset, status=status)
  2149. total = pg_strategy_store.count(status=status)
  2150. return {"strategies": results, "total": total}
  2151. except Exception as e:
  2152. raise HTTPException(status_code=500, detail=str(e))
  2153. @app.get("/api/strategy/search")
  2154. async def search_strategies(q: str = Query(...), top_k: int = 20, status: Optional[str] = None):
  2155. try:
  2156. query_embedding = await get_embedding(q)
  2157. results = pg_strategy_store.search(query_embedding, limit=top_k, status=status)
  2158. return {"results": results, "count": len(results)}
  2159. except Exception as e:
  2160. raise HTTPException(status_code=500, detail=str(e))
  2161. @app.get("/api/strategy/{strategy_id:path}")
  2162. def get_strategy(strategy_id: str):
  2163. try:
  2164. result = pg_strategy_store.get_by_id(strategy_id)
  2165. if not result:
  2166. raise HTTPException(status_code=404, detail="Strategy not found")
  2167. return result
  2168. except HTTPException:
  2169. raise
  2170. except Exception as e:
  2171. raise HTTPException(status_code=500, detail=str(e))
  2172. @app.patch("/api/strategy/{strategy_id:path}")
  2173. async def patch_strategy(strategy_id: str, updates: StrategyPatchIn):
  2174. """更新策略字段。若改了 name/description,会重算向量。"""
  2175. try:
  2176. existing = pg_strategy_store.get_by_id(strategy_id)
  2177. if not existing:
  2178. raise HTTPException(status_code=404, detail="Strategy not found")
  2179. update_dict = updates.model_dump(exclude_unset=True)
  2180. if not update_dict:
  2181. return {"success": True}
  2182. if 'name' in update_dict or 'description' in update_dict:
  2183. name = update_dict.get('name', existing.get('name', ''))
  2184. desc = update_dict.get('description', existing.get('description', ''))
  2185. update_dict['embedding'] = await get_embedding(f"{name} {desc}")
  2186. update_dict['updated_at'] = int(time.time())
  2187. pg_strategy_store.update(strategy_id, update_dict)
  2188. return {"success": True}
  2189. except HTTPException:
  2190. raise
  2191. except Exception as e:
  2192. raise HTTPException(status_code=500, detail=str(e))
  2193. @app.delete("/api/strategy/{strategy_id:path}")
  2194. def delete_strategy(strategy_id: str):
  2195. try:
  2196. if not pg_strategy_store.get_by_id(strategy_id):
  2197. raise HTTPException(status_code=404, detail="Strategy not found")
  2198. pg_strategy_store.delete(strategy_id)
  2199. return {"success": True}
  2200. except HTTPException:
  2201. raise
  2202. except Exception as e:
  2203. raise HTTPException(status_code=500, detail=str(e))
  2204. # ===== Relation API =====
  2205. @app.get("/api/relation/{table_name}")
  2206. async def get_relations(table_name: str, request: Request):
  2207. """通用关系表查询接口"""
  2208. allowed_tables = {
  2209. "capability_knowledge",
  2210. "capability_tool",
  2211. "knowledge_relation",
  2212. "knowledge_resource",
  2213. "requirement_capability",
  2214. "requirement_knowledge",
  2215. "tool_knowledge",
  2216. "tool_provider"
  2217. }
  2218. table_name = table_name.lower()
  2219. if table_name not in allowed_tables:
  2220. raise HTTPException(status_code=400, detail="Invalid table name")
  2221. try:
  2222. params = dict(request.query_params)
  2223. where_clauses = []
  2224. values = []
  2225. for k, v in params.items():
  2226. if k in ["limit", "offset"]: continue
  2227. where_clauses.append(f"{k} = %s")
  2228. values.append(v)
  2229. query = f"SELECT * FROM {table_name}"
  2230. if where_clauses:
  2231. query += " WHERE " + " AND ".join(where_clauses)
  2232. limit = int(params.get("limit", 100))
  2233. query += " LIMIT %s"
  2234. values.append(limit)
  2235. cursor = pg_store._get_cursor()
  2236. try:
  2237. cursor.execute(query, tuple(values))
  2238. rows = cursor.fetchall()
  2239. if not rows:
  2240. return {"results": [], "count": 0}
  2241. colnames = [desc[0] for desc in cursor.description]
  2242. results = [dict(zip(colnames, row)) for row in rows]
  2243. return {"results": results, "count": len(results)}
  2244. finally:
  2245. cursor.close()
  2246. except Exception as e:
  2247. raise HTTPException(status_code=500, detail=str(e))
  2248. # --- Dashboard Snapshot (缓存聚合接口) ---
  2249. _dashboard_snapshot_cache: Dict[str, dict] = {} # 按 version 分桶
  2250. _dashboard_snapshot_ts: Dict[str, float] = {}
  2251. _DASHBOARD_CACHE_TTL = 24 * 3600 # 24 小时
  2252. def _invalidate_dashboard_cache():
  2253. """数据写入后调用,清除 dashboard 快照缓存(所有版本)"""
  2254. _dashboard_snapshot_cache.clear()
  2255. _dashboard_snapshot_ts.clear()
  2256. def _build_dashboard_snapshot() -> dict:
  2257. """在后端一次性构建 Dashboard 所需的全部数据"""
  2258. # 兼容三种部署形态:production build / dev (vite public) / 仓库 static
  2259. _PROJECT_ROOT = Path(__file__).parent
  2260. tree_candidates = [
  2261. STATIC_DIR / "category_tree.json",
  2262. _PROJECT_ROOT / "frontend" / "public" / "category_tree.json",
  2263. _PROJECT_ROOT / "static" / "category_tree.json",
  2264. ]
  2265. tree_data = None
  2266. for tree_file in tree_candidates:
  2267. if tree_file.exists():
  2268. tree_data = json.loads(tree_file.read_text(encoding="utf-8"))
  2269. break
  2270. if tree_data is None:
  2271. raise HTTPException(
  2272. status_code=500,
  2273. detail=f"category_tree.json not found in any of: {[str(p) for p in tree_candidates]}",
  2274. )
  2275. reqs = pg_requirement_store.list_all(limit=1000, offset=0)
  2276. caps = pg_capability_store.list_all(limit=1000, offset=0)
  2277. tools = pg_tool_store.list_all(limit=1000, offset=0)
  2278. procs = pg_strategy_store.list_all(limit=1000, offset=0)
  2279. know_raw = pg_store.query('(status == "approved" or status == "checked")', limit=1000)
  2280. know = [to_serializable(r) for r in know_raw]
  2281. return {
  2282. "tree": tree_data,
  2283. "reqs": reqs,
  2284. "caps": caps,
  2285. "tools": tools,
  2286. "procs": procs,
  2287. "know": know,
  2288. "built_at": time.time(),
  2289. }
  2290. @app.get("/api/dashboard/snapshot")
  2291. def get_dashboard_snapshot():
  2292. """返回 Dashboard 所需的全部数据快照,带服务端内存缓存(24h TTL,写入时失效)。按 version 分桶。"""
  2293. from knowhub.knowhub_db.version_context import get_version
  2294. v = get_version()
  2295. now = time.time()
  2296. cached = _dashboard_snapshot_cache.get(v)
  2297. ts = _dashboard_snapshot_ts.get(v, 0)
  2298. if cached and (now - ts < _DASHBOARD_CACHE_TTL):
  2299. return cached
  2300. try:
  2301. snap = _build_dashboard_snapshot()
  2302. snap['version'] = v
  2303. _dashboard_snapshot_cache[v] = snap
  2304. _dashboard_snapshot_ts[v] = now
  2305. return snap
  2306. except Exception as e:
  2307. raise HTTPException(status_code=500, detail=str(e))
  2308. @app.post("/api/dashboard/invalidate")
  2309. def invalidate_dashboard_cache():
  2310. """手动清除 dashboard 缓存"""
  2311. _invalidate_dashboard_cache()
  2312. return {"status": "ok"}
  2313. @app.get("/category_tree.json")
  2314. def serve_category_tree():
  2315. """类别树JSON数据"""
  2316. tree_file = STATIC_DIR / "category_tree.json"
  2317. if not tree_file.exists():
  2318. return {"error": "Not Found"}
  2319. return FileResponse(str(tree_file))
  2320. @app.get("/{frontend_path:path}")
  2321. def frontend_spa_fallback(frontend_path: str):
  2322. """SPA 路由兜底:将非 API 的前端子路径回退到 index.html,由 React Router 处理。"""
  2323. if frontend_path.startswith("api/") or frontend_path.startswith("assets/"):
  2324. raise HTTPException(status_code=404, detail="Not Found")
  2325. # 带扩展名的路径尝试作为静态文件直接返回,不存在则抛 404
  2326. if "." in Path(frontend_path).name:
  2327. full_path = STATIC_DIR / frontend_path
  2328. if full_path.exists() and full_path.is_file():
  2329. return FileResponse(str(full_path))
  2330. raise HTTPException(status_code=404, detail="Not Found")
  2331. index_file = STATIC_DIR / "index.html"
  2332. if not index_file.exists():
  2333. return HTMLResponse("<h1>KnowHub Frontend Not Found</h1><p>Please ensure knowhub/frontend/dist/index.html exists. Run 'yarn build' in frontend directory.</p>", status_code=404)
  2334. return FileResponse(str(index_file))
  2335. if __name__ == "__main__":
  2336. import uvicorn
  2337. uvicorn.run(app, host="0.0.0.0", port=9999)