|
@@ -77,25 +77,22 @@ def init_db():
|
|
|
CREATE TABLE IF NOT EXISTS knowledge (
|
|
CREATE TABLE IF NOT EXISTS knowledge (
|
|
|
id TEXT PRIMARY KEY,
|
|
id TEXT PRIMARY KEY,
|
|
|
message_id TEXT DEFAULT '',
|
|
message_id TEXT DEFAULT '',
|
|
|
- tags_type TEXT NOT NULL,
|
|
|
|
|
- scenario TEXT NOT NULL,
|
|
|
|
|
|
|
+ types TEXT NOT NULL, -- JSON array: ["strategy", "tool"]
|
|
|
|
|
+ task TEXT NOT NULL,
|
|
|
|
|
+ tags TEXT DEFAULT '{}', -- JSON object: {"category": "...", "domain": "..."}
|
|
|
|
|
+ scopes TEXT DEFAULT '["org:cybertogether"]', -- JSON array
|
|
|
|
|
+ owner TEXT DEFAULT '',
|
|
|
content TEXT NOT NULL,
|
|
content TEXT NOT NULL,
|
|
|
- source_urls TEXT DEFAULT '',
|
|
|
|
|
- source_agent_id TEXT DEFAULT '',
|
|
|
|
|
- source_timestamp TEXT NOT NULL,
|
|
|
|
|
- eval_score INTEGER DEFAULT 3 CHECK(eval_score BETWEEN 1 AND 5),
|
|
|
|
|
- eval_helpful INTEGER DEFAULT 0,
|
|
|
|
|
- eval_harmful INTEGER DEFAULT 0,
|
|
|
|
|
- eval_helpful_history TEXT DEFAULT '[]',
|
|
|
|
|
- eval_harmful_history TEXT DEFAULT '[]',
|
|
|
|
|
- metrics_helpful INTEGER DEFAULT 1,
|
|
|
|
|
- metrics_harmful INTEGER DEFAULT 0,
|
|
|
|
|
|
|
+ source TEXT DEFAULT '{}', -- JSON object: {name, category, urls, agent_id, submitted_by, timestamp}
|
|
|
|
|
+ eval TEXT DEFAULT '{}', -- JSON object: {score, helpful, harmful, confidence, histories}
|
|
|
created_at TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
|
updated_at TEXT DEFAULT ''
|
|
updated_at TEXT DEFAULT ''
|
|
|
)
|
|
)
|
|
|
""")
|
|
""")
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_tags ON knowledge(tags_type)")
|
|
|
|
|
- conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_scenario ON knowledge(scenario)")
|
|
|
|
|
|
|
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_types ON knowledge(types)")
|
|
|
|
|
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_task ON knowledge(task)")
|
|
|
|
|
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_owner ON knowledge(owner)")
|
|
|
|
|
+ conn.execute("CREATE INDEX IF NOT EXISTS idx_knowledge_scopes ON knowledge(scopes)")
|
|
|
|
|
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
conn.close()
|
|
conn.close()
|
|
@@ -156,24 +153,28 @@ class ContentIn(BaseModel):
|
|
|
|
|
|
|
|
# Knowledge Models
|
|
# Knowledge Models
|
|
|
class KnowledgeIn(BaseModel):
|
|
class KnowledgeIn(BaseModel):
|
|
|
- scenario: str
|
|
|
|
|
|
|
+ task: str
|
|
|
content: str
|
|
content: str
|
|
|
- tags_type: list[str]
|
|
|
|
|
- urls: list[str] = []
|
|
|
|
|
- agent_id: str = "research_agent"
|
|
|
|
|
- score: int = Field(default=3, ge=1, le=5)
|
|
|
|
|
|
|
+ types: list[str] = ["strategy"]
|
|
|
|
|
+ tags: dict = {}
|
|
|
|
|
+ scopes: list[str] = ["org:cybertogether"]
|
|
|
|
|
+ owner: str = ""
|
|
|
message_id: str = ""
|
|
message_id: str = ""
|
|
|
|
|
+ source: dict = {} # {name, category, urls, agent_id, submitted_by, timestamp}
|
|
|
|
|
+ eval: dict = {} # {score, helpful, harmful, confidence}
|
|
|
|
|
|
|
|
|
|
|
|
|
class KnowledgeOut(BaseModel):
|
|
class KnowledgeOut(BaseModel):
|
|
|
id: str
|
|
id: str
|
|
|
message_id: str
|
|
message_id: str
|
|
|
|
|
+ types: list[str]
|
|
|
|
|
+ task: str
|
|
|
tags: dict
|
|
tags: dict
|
|
|
- scenario: str
|
|
|
|
|
|
|
+ scopes: list[str]
|
|
|
|
|
+ owner: str
|
|
|
content: str
|
|
content: str
|
|
|
source: dict
|
|
source: dict
|
|
|
eval: dict
|
|
eval: dict
|
|
|
- metrics: dict
|
|
|
|
|
created_at: str
|
|
created_at: str
|
|
|
updated_at: str
|
|
updated_at: str
|
|
|
|
|
|
|
@@ -448,8 +449,8 @@ async def _route_knowledge_by_llm(query_text: str, metadata_list: list[dict], k:
|
|
|
routing_data = [
|
|
routing_data = [
|
|
|
{
|
|
{
|
|
|
"id": m["id"],
|
|
"id": m["id"],
|
|
|
- "tags": m["tags"],
|
|
|
|
|
- "scenario": m["scenario"][:100]
|
|
|
|
|
|
|
+ "types": m["types"],
|
|
|
|
|
+ "task": m["task"][:100]
|
|
|
} for m in metadata_list
|
|
} for m in metadata_list
|
|
|
]
|
|
]
|
|
|
|
|
|
|
@@ -485,7 +486,7 @@ async def _search_knowledge_two_stage(
|
|
|
query_text: str,
|
|
query_text: str,
|
|
|
top_k: int = 5,
|
|
top_k: int = 5,
|
|
|
min_score: int = 3,
|
|
min_score: int = 3,
|
|
|
- tags_filter: Optional[list[str]] = None,
|
|
|
|
|
|
|
+ types_filter: Optional[list[str]] = None,
|
|
|
conn: sqlite3.Connection = None
|
|
conn: sqlite3.Connection = None
|
|
|
) -> list[dict]:
|
|
) -> list[dict]:
|
|
|
"""
|
|
"""
|
|
@@ -510,38 +511,40 @@ async def _search_knowledge_two_stage(
|
|
|
|
|
|
|
|
for row in rows:
|
|
for row in rows:
|
|
|
kid = row["id"]
|
|
kid = row["id"]
|
|
|
- tags_type = row["tags_type"].split(",") if row["tags_type"] else []
|
|
|
|
|
|
|
+ types = json.loads(row["types"])
|
|
|
|
|
|
|
|
# 标签过滤
|
|
# 标签过滤
|
|
|
- if tags_filter:
|
|
|
|
|
- if not any(tag in tags_type for tag in tags_filter):
|
|
|
|
|
|
|
+ if types_filter:
|
|
|
|
|
+ if not any(t in types for t in types_filter):
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
- scenario = row["scenario"]
|
|
|
|
|
|
|
+ task = row["task"]
|
|
|
content_text = row["content"]
|
|
content_text = row["content"]
|
|
|
|
|
+ eval_data = json.loads(row["eval"])
|
|
|
|
|
+ source = json.loads(row["source"])
|
|
|
|
|
|
|
|
meta_item = {
|
|
meta_item = {
|
|
|
"id": kid,
|
|
"id": kid,
|
|
|
- "tags": {"type": tags_type},
|
|
|
|
|
- "scenario": scenario,
|
|
|
|
|
- "score": row["eval_score"],
|
|
|
|
|
- "helpful": row["metrics_helpful"],
|
|
|
|
|
- "harmful": row["metrics_harmful"],
|
|
|
|
|
|
|
+ "types": types,
|
|
|
|
|
+ "task": task,
|
|
|
|
|
+ "score": eval_data.get("score", 3),
|
|
|
|
|
+ "helpful": eval_data.get("helpful", 0),
|
|
|
|
|
+ "harmful": eval_data.get("harmful", 0),
|
|
|
}
|
|
}
|
|
|
metadata_list.append(meta_item)
|
|
metadata_list.append(meta_item)
|
|
|
content_map[kid] = {
|
|
content_map[kid] = {
|
|
|
- "scenario": scenario,
|
|
|
|
|
|
|
+ "task": task,
|
|
|
"content": content_text,
|
|
"content": content_text,
|
|
|
- "tags": {"type": tags_type},
|
|
|
|
|
|
|
+ "types": types,
|
|
|
|
|
+ "tags": json.loads(row["tags"]),
|
|
|
|
|
+ "scopes": json.loads(row["scopes"]),
|
|
|
|
|
+ "owner": row["owner"],
|
|
|
"score": meta_item["score"],
|
|
"score": meta_item["score"],
|
|
|
"helpful": meta_item["helpful"],
|
|
"helpful": meta_item["helpful"],
|
|
|
"harmful": meta_item["harmful"],
|
|
"harmful": meta_item["harmful"],
|
|
|
"message_id": row["message_id"],
|
|
"message_id": row["message_id"],
|
|
|
- "source": {
|
|
|
|
|
- "urls": row["source_urls"].split(",") if row["source_urls"] else [],
|
|
|
|
|
- "agent_id": row["source_agent_id"],
|
|
|
|
|
- "timestamp": row["source_timestamp"]
|
|
|
|
|
- },
|
|
|
|
|
|
|
+ "source": source,
|
|
|
|
|
+ "eval": eval_data,
|
|
|
"created_at": row["created_at"],
|
|
"created_at": row["created_at"],
|
|
|
"updated_at": row["updated_at"]
|
|
"updated_at": row["updated_at"]
|
|
|
}
|
|
}
|
|
@@ -574,16 +577,15 @@ async def _search_knowledge_two_stage(
|
|
|
scored_items.append({
|
|
scored_items.append({
|
|
|
"id": kid,
|
|
"id": kid,
|
|
|
"message_id": item["message_id"],
|
|
"message_id": item["message_id"],
|
|
|
- "scenario": item["scenario"],
|
|
|
|
|
- "content": item["content"],
|
|
|
|
|
|
|
+ "types": item["types"],
|
|
|
|
|
+ "task": item["task"],
|
|
|
"tags": item["tags"],
|
|
"tags": item["tags"],
|
|
|
- "score": score,
|
|
|
|
|
- "quality_score": quality_score,
|
|
|
|
|
- "metrics": {
|
|
|
|
|
- "helpful": helpful,
|
|
|
|
|
- "harmful": harmful
|
|
|
|
|
- },
|
|
|
|
|
|
|
+ "scopes": item["scopes"],
|
|
|
|
|
+ "owner": item["owner"],
|
|
|
|
|
+ "content": item["content"],
|
|
|
"source": item["source"],
|
|
"source": item["source"],
|
|
|
|
|
+ "eval": item["eval"],
|
|
|
|
|
+ "quality_score": quality_score,
|
|
|
"created_at": item["created_at"],
|
|
"created_at": item["created_at"],
|
|
|
"updated_at": item["updated_at"]
|
|
"updated_at": item["updated_at"]
|
|
|
})
|
|
})
|
|
@@ -608,18 +610,18 @@ async def search_knowledge_api(
|
|
|
q: str = Query(..., description="查询文本"),
|
|
q: str = Query(..., description="查询文本"),
|
|
|
top_k: int = Query(default=5, ge=1, le=20),
|
|
top_k: int = Query(default=5, ge=1, le=20),
|
|
|
min_score: int = Query(default=3, ge=1, le=5),
|
|
min_score: int = Query(default=3, ge=1, le=5),
|
|
|
- tags_type: Optional[str] = None
|
|
|
|
|
|
|
+ types: Optional[str] = None
|
|
|
):
|
|
):
|
|
|
"""检索知识(两阶段:语义路由 + 质量精排)"""
|
|
"""检索知识(两阶段:语义路由 + 质量精排)"""
|
|
|
conn = get_db()
|
|
conn = get_db()
|
|
|
try:
|
|
try:
|
|
|
- tags_filter = tags_type.split(",") if tags_type else None
|
|
|
|
|
|
|
+ types_filter = types.split(",") if types else None
|
|
|
|
|
|
|
|
results = await _search_knowledge_two_stage(
|
|
results = await _search_knowledge_two_stage(
|
|
|
query_text=q,
|
|
query_text=q,
|
|
|
top_k=top_k,
|
|
top_k=top_k,
|
|
|
min_score=min_score,
|
|
min_score=min_score,
|
|
|
- tags_filter=tags_filter,
|
|
|
|
|
|
|
+ types_filter=types_filter,
|
|
|
conn=conn
|
|
conn=conn
|
|
|
)
|
|
)
|
|
|
|
|
|
|
@@ -641,30 +643,46 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
|
|
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
|
|
|
|
|
|
|
+ # 设置默认值
|
|
|
|
|
+ owner = knowledge.owner or f"agent:{knowledge.source.get('agent_id', 'unknown')}"
|
|
|
|
|
+
|
|
|
|
|
+ # 准备 source
|
|
|
|
|
+ source = {
|
|
|
|
|
+ "name": knowledge.source.get("name", ""),
|
|
|
|
|
+ "category": knowledge.source.get("category", ""),
|
|
|
|
|
+ "urls": knowledge.source.get("urls", []),
|
|
|
|
|
+ "agent_id": knowledge.source.get("agent_id", "unknown"),
|
|
|
|
|
+ "submitted_by": knowledge.source.get("submitted_by", ""),
|
|
|
|
|
+ "timestamp": now,
|
|
|
|
|
+ "message_id": knowledge.message_id
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ # 准备 eval
|
|
|
|
|
+ eval_data = {
|
|
|
|
|
+ "score": knowledge.eval.get("score", 3),
|
|
|
|
|
+ "helpful": knowledge.eval.get("helpful", 1),
|
|
|
|
|
+ "harmful": knowledge.eval.get("harmful", 0),
|
|
|
|
|
+ "confidence": knowledge.eval.get("confidence", 0.5),
|
|
|
|
|
+ "helpful_history": [],
|
|
|
|
|
+ "harmful_history": []
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
conn.execute(
|
|
conn.execute(
|
|
|
"""INSERT INTO knowledge
|
|
"""INSERT INTO knowledge
|
|
|
- (id, message_id, tags_type, scenario, content,
|
|
|
|
|
- source_urls, source_agent_id, source_timestamp,
|
|
|
|
|
- eval_score, eval_helpful, eval_harmful,
|
|
|
|
|
- eval_helpful_history, eval_harmful_history,
|
|
|
|
|
- metrics_helpful, metrics_harmful, created_at, updated_at)
|
|
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
|
|
+ (id, message_id, types, task, tags, scopes, owner, content,
|
|
|
|
|
+ source, eval, created_at, updated_at)
|
|
|
|
|
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
(
|
|
(
|
|
|
knowledge_id,
|
|
knowledge_id,
|
|
|
knowledge.message_id,
|
|
knowledge.message_id,
|
|
|
- ",".join(knowledge.tags_type),
|
|
|
|
|
- knowledge.scenario,
|
|
|
|
|
|
|
+ json.dumps(knowledge.types),
|
|
|
|
|
+ knowledge.task,
|
|
|
|
|
+ json.dumps(knowledge.tags),
|
|
|
|
|
+ json.dumps(knowledge.scopes),
|
|
|
|
|
+ owner,
|
|
|
knowledge.content,
|
|
knowledge.content,
|
|
|
- ",".join(knowledge.urls),
|
|
|
|
|
- knowledge.agent_id,
|
|
|
|
|
- now,
|
|
|
|
|
- knowledge.score,
|
|
|
|
|
- 0, # eval_helpful
|
|
|
|
|
- 0, # eval_harmful
|
|
|
|
|
- "[]", # eval_helpful_history
|
|
|
|
|
- "[]", # eval_harmful_history
|
|
|
|
|
- 1, # metrics_helpful
|
|
|
|
|
- 0, # metrics_harmful
|
|
|
|
|
|
|
+ json.dumps(source),
|
|
|
|
|
+ json.dumps(eval_data),
|
|
|
now,
|
|
now,
|
|
|
now,
|
|
now,
|
|
|
),
|
|
),
|
|
@@ -678,17 +696,26 @@ def save_knowledge(knowledge: KnowledgeIn):
|
|
|
@app.get("/api/knowledge")
|
|
@app.get("/api/knowledge")
|
|
|
def list_knowledge(
|
|
def list_knowledge(
|
|
|
limit: int = Query(default=10, ge=1, le=100),
|
|
limit: int = Query(default=10, ge=1, le=100),
|
|
|
- tags_type: Optional[str] = None
|
|
|
|
|
|
|
+ types: Optional[str] = None,
|
|
|
|
|
+ scopes: Optional[str] = None
|
|
|
):
|
|
):
|
|
|
"""列出知识"""
|
|
"""列出知识"""
|
|
|
conn = get_db()
|
|
conn = get_db()
|
|
|
try:
|
|
try:
|
|
|
query = "SELECT * FROM knowledge"
|
|
query = "SELECT * FROM knowledge"
|
|
|
params = []
|
|
params = []
|
|
|
|
|
+ conditions = []
|
|
|
|
|
+
|
|
|
|
|
+ if types:
|
|
|
|
|
+ conditions.append("types LIKE ?")
|
|
|
|
|
+ params.append(f"%{types}%")
|
|
|
|
|
|
|
|
- if tags_type:
|
|
|
|
|
- query += " WHERE tags_type LIKE ?"
|
|
|
|
|
- params.append(f"%{tags_type}%")
|
|
|
|
|
|
|
+ if scopes:
|
|
|
|
|
+ conditions.append("scopes LIKE ?")
|
|
|
|
|
+ params.append(f"%{scopes}%")
|
|
|
|
|
+
|
|
|
|
|
+ if conditions:
|
|
|
|
|
+ query += " WHERE " + " AND ".join(conditions)
|
|
|
|
|
|
|
|
query += " ORDER BY created_at DESC LIMIT ?"
|
|
query += " ORDER BY created_at DESC LIMIT ?"
|
|
|
params.append(limit)
|
|
params.append(limit)
|
|
@@ -700,23 +727,14 @@ def list_knowledge(
|
|
|
results.append({
|
|
results.append({
|
|
|
"id": row["id"],
|
|
"id": row["id"],
|
|
|
"message_id": row["message_id"],
|
|
"message_id": row["message_id"],
|
|
|
- "tags": {"type": row["tags_type"].split(",") if row["tags_type"] else []},
|
|
|
|
|
- "scenario": row["scenario"],
|
|
|
|
|
|
|
+ "types": json.loads(row["types"]),
|
|
|
|
|
+ "task": row["task"],
|
|
|
|
|
+ "tags": json.loads(row["tags"]),
|
|
|
|
|
+ "scopes": json.loads(row["scopes"]),
|
|
|
|
|
+ "owner": row["owner"],
|
|
|
"content": row["content"],
|
|
"content": row["content"],
|
|
|
- "source": {
|
|
|
|
|
- "urls": row["source_urls"].split(",") if row["source_urls"] else [],
|
|
|
|
|
- "agent_id": row["source_agent_id"],
|
|
|
|
|
- "timestamp": row["source_timestamp"]
|
|
|
|
|
- },
|
|
|
|
|
- "eval": {
|
|
|
|
|
- "score": row["eval_score"],
|
|
|
|
|
- "helpful": row["eval_helpful"],
|
|
|
|
|
- "harmful": row["eval_harmful"]
|
|
|
|
|
- },
|
|
|
|
|
- "metrics": {
|
|
|
|
|
- "helpful": row["metrics_helpful"],
|
|
|
|
|
- "harmful": row["metrics_harmful"]
|
|
|
|
|
- },
|
|
|
|
|
|
|
+ "source": json.loads(row["source"]),
|
|
|
|
|
+ "eval": json.loads(row["eval"]),
|
|
|
"created_at": row["created_at"],
|
|
"created_at": row["created_at"],
|
|
|
"updated_at": row["updated_at"]
|
|
"updated_at": row["updated_at"]
|
|
|
})
|
|
})
|
|
@@ -742,25 +760,14 @@ def get_knowledge(knowledge_id: str):
|
|
|
return {
|
|
return {
|
|
|
"id": row["id"],
|
|
"id": row["id"],
|
|
|
"message_id": row["message_id"],
|
|
"message_id": row["message_id"],
|
|
|
- "tags": {"type": row["tags_type"].split(",") if row["tags_type"] else []},
|
|
|
|
|
- "scenario": row["scenario"],
|
|
|
|
|
|
|
+ "types": json.loads(row["types"]),
|
|
|
|
|
+ "task": row["task"],
|
|
|
|
|
+ "tags": json.loads(row["tags"]),
|
|
|
|
|
+ "scopes": json.loads(row["scopes"]),
|
|
|
|
|
+ "owner": row["owner"],
|
|
|
"content": row["content"],
|
|
"content": row["content"],
|
|
|
- "source": {
|
|
|
|
|
- "urls": row["source_urls"].split(",") if row["source_urls"] else [],
|
|
|
|
|
- "agent_id": row["source_agent_id"],
|
|
|
|
|
- "timestamp": row["source_timestamp"]
|
|
|
|
|
- },
|
|
|
|
|
- "eval": {
|
|
|
|
|
- "score": row["eval_score"],
|
|
|
|
|
- "helpful": row["eval_helpful"],
|
|
|
|
|
- "harmful": row["eval_harmful"],
|
|
|
|
|
- "helpful_history": [],
|
|
|
|
|
- "harmful_history": []
|
|
|
|
|
- },
|
|
|
|
|
- "metrics": {
|
|
|
|
|
- "helpful": row["metrics_helpful"],
|
|
|
|
|
- "harmful": row["metrics_harmful"]
|
|
|
|
|
- },
|
|
|
|
|
|
|
+ "source": json.loads(row["source"]),
|
|
|
|
|
+ "eval": json.loads(row["eval"]),
|
|
|
"created_at": row["created_at"],
|
|
"created_at": row["created_at"],
|
|
|
"updated_at": row["updated_at"]
|
|
"updated_at": row["updated_at"]
|
|
|
}
|
|
}
|
|
@@ -808,33 +815,37 @@ async def update_knowledge(knowledge_id: str, update: KnowledgeUpdateIn):
|
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
raise HTTPException(status_code=404, detail=f"Knowledge not found: {knowledge_id}")
|
|
|
|
|
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
- updates = {"updated_at": now}
|
|
|
|
|
|
|
+ eval_data = json.loads(row["eval"])
|
|
|
|
|
|
|
|
|
|
+ # 更新评分
|
|
|
if update.update_score is not None:
|
|
if update.update_score is not None:
|
|
|
- updates["eval_score"] = update.update_score
|
|
|
|
|
|
|
+ eval_data["score"] = update.update_score
|
|
|
|
|
|
|
|
|
|
+ # 添加有效案例
|
|
|
if update.add_helpful_case:
|
|
if update.add_helpful_case:
|
|
|
- helpful_history = json.loads(row["eval_helpful_history"] or "[]")
|
|
|
|
|
- helpful_history.append(update.add_helpful_case)
|
|
|
|
|
- updates["eval_helpful"] = row["eval_helpful"] + 1
|
|
|
|
|
- updates["eval_helpful_history"] = json.dumps(helpful_history, ensure_ascii=False)
|
|
|
|
|
- updates["metrics_helpful"] = row["metrics_helpful"] + 1
|
|
|
|
|
|
|
+ eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
|
|
+ if "helpful_history" not in eval_data:
|
|
|
|
|
+ eval_data["helpful_history"] = []
|
|
|
|
|
+ eval_data["helpful_history"].append(update.add_helpful_case)
|
|
|
|
|
|
|
|
|
|
+ # 添加有害案例
|
|
|
if update.add_harmful_case:
|
|
if update.add_harmful_case:
|
|
|
- harmful_history = json.loads(row["eval_harmful_history"] or "[]")
|
|
|
|
|
- harmful_history.append(update.add_harmful_case)
|
|
|
|
|
- updates["eval_harmful"] = row["eval_harmful"] + 1
|
|
|
|
|
- updates["eval_harmful_history"] = json.dumps(harmful_history, ensure_ascii=False)
|
|
|
|
|
- updates["metrics_harmful"] = row["metrics_harmful"] + 1
|
|
|
|
|
|
|
+ eval_data["harmful"] = eval_data.get("harmful", 0) + 1
|
|
|
|
|
+ if "harmful_history" not in eval_data:
|
|
|
|
|
+ eval_data["harmful_history"] = []
|
|
|
|
|
+ eval_data["harmful_history"].append(update.add_harmful_case)
|
|
|
|
|
|
|
|
|
|
+ # 知识进化
|
|
|
|
|
+ content = row["content"]
|
|
|
if update.evolve_feedback:
|
|
if update.evolve_feedback:
|
|
|
- evolved_content = await _evolve_knowledge_with_llm(row["content"], update.evolve_feedback)
|
|
|
|
|
- updates["content"] = evolved_content
|
|
|
|
|
- updates["metrics_helpful"] = updates.get("metrics_helpful", row["metrics_helpful"]) + 1
|
|
|
|
|
|
|
+ content = await _evolve_knowledge_with_llm(content, update.evolve_feedback)
|
|
|
|
|
+ eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
|
|
|
|
|
- set_clause = ", ".join(f"{k} = ?" for k in updates)
|
|
|
|
|
- values = list(updates.values()) + [knowledge_id]
|
|
|
|
|
- conn.execute(f"UPDATE knowledge SET {set_clause} WHERE id = ?", values)
|
|
|
|
|
|
|
+ # 更新数据库
|
|
|
|
|
+ conn.execute(
|
|
|
|
|
+ "UPDATE knowledge SET content = ?, eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
+ (content, json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
|
|
+ )
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
|
|
|
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
return {"status": "ok", "knowledge_id": knowledge_id}
|
|
@@ -851,8 +862,8 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
conn = get_db()
|
|
conn = get_db()
|
|
|
try:
|
|
try:
|
|
|
# 先处理无需进化的,收集需要进化的
|
|
# 先处理无需进化的,收集需要进化的
|
|
|
- evolution_tasks = [] # [(knowledge_id, old_content, feedback)]
|
|
|
|
|
- simple_updates = [] # [(knowledge_id, is_effective)]
|
|
|
|
|
|
|
+ evolution_tasks = [] # [(knowledge_id, old_content, feedback, eval_data)]
|
|
|
|
|
+ simple_updates = [] # [(knowledge_id, is_effective, eval_data)]
|
|
|
|
|
|
|
|
for item in batch.feedback_list:
|
|
for item in batch.feedback_list:
|
|
|
knowledge_id = item.get("knowledge_id")
|
|
knowledge_id = item.get("knowledge_id")
|
|
@@ -866,24 +877,25 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
if not row:
|
|
if not row:
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
|
|
+ eval_data = json.loads(row["eval"])
|
|
|
|
|
+
|
|
|
if is_effective and feedback:
|
|
if is_effective and feedback:
|
|
|
- evolution_tasks.append((knowledge_id, row["content"], feedback, row["metrics_helpful"]))
|
|
|
|
|
|
|
+ evolution_tasks.append((knowledge_id, row["content"], feedback, eval_data))
|
|
|
else:
|
|
else:
|
|
|
- simple_updates.append((knowledge_id, is_effective, row["metrics_helpful"], row["metrics_harmful"]))
|
|
|
|
|
|
|
+ simple_updates.append((knowledge_id, is_effective, eval_data))
|
|
|
|
|
|
|
|
# 执行简单更新
|
|
# 执行简单更新
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
- for knowledge_id, is_effective, cur_helpful, cur_harmful in simple_updates:
|
|
|
|
|
|
|
+ for knowledge_id, is_effective, eval_data in simple_updates:
|
|
|
if is_effective:
|
|
if is_effective:
|
|
|
- conn.execute(
|
|
|
|
|
- "UPDATE knowledge SET metrics_helpful = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (cur_helpful + 1, now, knowledge_id)
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
else:
|
|
else:
|
|
|
- conn.execute(
|
|
|
|
|
- "UPDATE knowledge SET metrics_harmful = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (cur_harmful + 1, now, knowledge_id)
|
|
|
|
|
- )
|
|
|
|
|
|
|
+ eval_data["harmful"] = eval_data.get("harmful", 0) + 1
|
|
|
|
|
+
|
|
|
|
|
+ conn.execute(
|
|
|
|
|
+ "UPDATE knowledge SET eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
+ (json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
|
|
+ )
|
|
|
|
|
|
|
|
# 并发执行知识进化
|
|
# 并发执行知识进化
|
|
|
if evolution_tasks:
|
|
if evolution_tasks:
|
|
@@ -891,10 +903,11 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
evolved_results = await asyncio.gather(
|
|
evolved_results = await asyncio.gather(
|
|
|
*[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _ in evolution_tasks]
|
|
*[_evolve_knowledge_with_llm(old, fb) for _, old, fb, _ in evolution_tasks]
|
|
|
)
|
|
)
|
|
|
- for (knowledge_id, _, _, cur_helpful), evolved_content in zip(evolution_tasks, evolved_results):
|
|
|
|
|
|
|
+ for (knowledge_id, _, _, eval_data), evolved_content in zip(evolution_tasks, evolved_results):
|
|
|
|
|
+ eval_data["helpful"] = eval_data.get("helpful", 0) + 1
|
|
|
conn.execute(
|
|
conn.execute(
|
|
|
- "UPDATE knowledge SET content = ?, metrics_helpful = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
- (evolved_content, cur_helpful + 1, now, knowledge_id)
|
|
|
|
|
|
|
+ "UPDATE knowledge SET content = ?, eval = ?, updated_at = ? WHERE id = ?",
|
|
|
|
|
+ (evolved_content, json.dumps(eval_data, ensure_ascii=False), now, knowledge_id)
|
|
|
)
|
|
)
|
|
|
|
|
|
|
|
conn.commit()
|
|
conn.commit()
|
|
@@ -904,27 +917,29 @@ async def batch_update_knowledge(batch: KnowledgeBatchUpdateIn):
|
|
|
|
|
|
|
|
|
|
|
|
|
@app.post("/api/knowledge/slim")
|
|
@app.post("/api/knowledge/slim")
|
|
|
-async def slim_knowledge(model: str = "anthropic/claude-sonnet-4-5"):
|
|
|
|
|
|
|
+async def slim_knowledge(model: str = "google/gemini-2.0-flash-001"):
|
|
|
"""知识库瘦身:合并语义相似知识"""
|
|
"""知识库瘦身:合并语义相似知识"""
|
|
|
conn = get_db()
|
|
conn = get_db()
|
|
|
try:
|
|
try:
|
|
|
- rows = conn.execute("SELECT * FROM knowledge ORDER BY metrics_helpful DESC").fetchall()
|
|
|
|
|
|
|
+ rows = conn.execute("SELECT * FROM knowledge").fetchall()
|
|
|
if len(rows) < 2:
|
|
if len(rows) < 2:
|
|
|
return {"status": "ok", "message": f"知识库仅有 {len(rows)} 条,无需瘦身"}
|
|
return {"status": "ok", "message": f"知识库仅有 {len(rows)} 条,无需瘦身"}
|
|
|
|
|
|
|
|
# 构造发给大模型的内容
|
|
# 构造发给大模型的内容
|
|
|
entries_text = ""
|
|
entries_text = ""
|
|
|
for row in rows:
|
|
for row in rows:
|
|
|
- entries_text += f"[ID: {row['id']}] [Tags: {row['tags_type']}] "
|
|
|
|
|
- entries_text += f"[Helpful: {row['metrics_helpful']}, Harmful: {row['metrics_harmful']}] [Score: {row['eval_score']}]\n"
|
|
|
|
|
- entries_text += f"Scenario: {row['scenario']}\n"
|
|
|
|
|
|
|
+ eval_data = json.loads(row["eval"])
|
|
|
|
|
+ types = json.loads(row["types"])
|
|
|
|
|
+ entries_text += f"[ID: {row['id']}] [Types: {','.join(types)}] "
|
|
|
|
|
+ entries_text += f"[Helpful: {eval_data.get('helpful', 0)}, Harmful: {eval_data.get('harmful', 0)}] [Score: {eval_data.get('score', 3)}]\n"
|
|
|
|
|
+ entries_text += f"Task: {row['task']}\n"
|
|
|
entries_text += f"Content: {row['content'][:200]}...\n\n"
|
|
entries_text += f"Content: {row['content'][:200]}...\n\n"
|
|
|
|
|
|
|
|
prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
|
|
prompt = f"""你是一个 AI Agent 知识库管理员。以下是当前知识库的全部条目,请执行瘦身操作:
|
|
|
|
|
|
|
|
【任务】:
|
|
【任务】:
|
|
|
1. 识别语义高度相似或重复的知识,将它们合并为一条更精炼、更通用的知识。
|
|
1. 识别语义高度相似或重复的知识,将它们合并为一条更精炼、更通用的知识。
|
|
|
-2. 合并时保留 helpful 最高的那条的 ID(metrics_helpful 取各条之和)。
|
|
|
|
|
|
|
+2. 合并时保留 helpful 最高的那条的 ID(helpful 取各条之和)。
|
|
|
3. 对于独立的、无重复的知识,保持原样不动。
|
|
3. 对于独立的、无重复的知识,保持原样不动。
|
|
|
|
|
|
|
|
【当前知识库】:
|
|
【当前知识库】:
|
|
@@ -933,11 +948,11 @@ async def slim_knowledge(model: str = "anthropic/claude-sonnet-4-5"):
|
|
|
【输出格式要求】:
|
|
【输出格式要求】:
|
|
|
严格按以下格式输出每条知识,条目之间用 === 分隔:
|
|
严格按以下格式输出每条知识,条目之间用 === 分隔:
|
|
|
ID: <保留的id>
|
|
ID: <保留的id>
|
|
|
-TAGS: <逗号分隔的type列表>
|
|
|
|
|
|
|
+TYPES: <逗号分隔的type列表>
|
|
|
HELPFUL: <合并后的helpful计数>
|
|
HELPFUL: <合并后的helpful计数>
|
|
|
HARMFUL: <合并后的harmful计数>
|
|
HARMFUL: <合并后的harmful计数>
|
|
|
SCORE: <评分>
|
|
SCORE: <评分>
|
|
|
-SCENARIO: <场景描述>
|
|
|
|
|
|
|
+TASK: <任务描述>
|
|
|
CONTENT: <合并后的知识内容>
|
|
CONTENT: <合并后的知识内容>
|
|
|
===
|
|
===
|
|
|
|
|
|
|
@@ -966,15 +981,16 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
continue
|
|
continue
|
|
|
|
|
|
|
|
lines = block.split("\n")
|
|
lines = block.split("\n")
|
|
|
- kid, tags, helpful, harmful, score, scenario, content_lines = None, "", 0, 0, 3, "", []
|
|
|
|
|
|
|
+ kid, types, helpful, harmful, score, task, content_lines = None, [], 0, 0, 3, "", []
|
|
|
current_field = None
|
|
current_field = None
|
|
|
|
|
|
|
|
for line in lines:
|
|
for line in lines:
|
|
|
if line.startswith("ID:"):
|
|
if line.startswith("ID:"):
|
|
|
kid = line[3:].strip()
|
|
kid = line[3:].strip()
|
|
|
current_field = None
|
|
current_field = None
|
|
|
- elif line.startswith("TAGS:"):
|
|
|
|
|
- tags = line[5:].strip()
|
|
|
|
|
|
|
+ elif line.startswith("TYPES:"):
|
|
|
|
|
+ types_str = line[6:].strip()
|
|
|
|
|
+ types = [t.strip() for t in types_str.split(",") if t.strip()]
|
|
|
current_field = None
|
|
current_field = None
|
|
|
elif line.startswith("HELPFUL:"):
|
|
elif line.startswith("HELPFUL:"):
|
|
|
try:
|
|
try:
|
|
@@ -994,25 +1010,25 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
except Exception:
|
|
except Exception:
|
|
|
score = 3
|
|
score = 3
|
|
|
current_field = None
|
|
current_field = None
|
|
|
- elif line.startswith("SCENARIO:"):
|
|
|
|
|
- scenario = line[9:].strip()
|
|
|
|
|
- current_field = "scenario"
|
|
|
|
|
|
|
+ elif line.startswith("TASK:"):
|
|
|
|
|
+ task = line[5:].strip()
|
|
|
|
|
+ current_field = "task"
|
|
|
elif line.startswith("CONTENT:"):
|
|
elif line.startswith("CONTENT:"):
|
|
|
content_lines.append(line[8:].strip())
|
|
content_lines.append(line[8:].strip())
|
|
|
current_field = "content"
|
|
current_field = "content"
|
|
|
- elif current_field == "scenario":
|
|
|
|
|
- scenario += "\n" + line
|
|
|
|
|
|
|
+ elif current_field == "task":
|
|
|
|
|
+ task += "\n" + line
|
|
|
elif current_field == "content":
|
|
elif current_field == "content":
|
|
|
content_lines.append(line)
|
|
content_lines.append(line)
|
|
|
|
|
|
|
|
if kid and content_lines:
|
|
if kid and content_lines:
|
|
|
new_entries.append({
|
|
new_entries.append({
|
|
|
"id": kid,
|
|
"id": kid,
|
|
|
- "tags": tags,
|
|
|
|
|
|
|
+ "types": types if types else ["strategy"],
|
|
|
"helpful": helpful,
|
|
"helpful": helpful,
|
|
|
"harmful": harmful,
|
|
"harmful": harmful,
|
|
|
"score": score,
|
|
"score": score,
|
|
|
- "scenario": scenario.strip(),
|
|
|
|
|
|
|
+ "task": task.strip(),
|
|
|
"content": "\n".join(content_lines).strip()
|
|
"content": "\n".join(content_lines).strip()
|
|
|
})
|
|
})
|
|
|
|
|
|
|
@@ -1023,18 +1039,40 @@ REPORT: 原有 X 条,合并后 Y 条,精简了 Z 条。
|
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
now = datetime.now(timezone.utc).isoformat()
|
|
|
conn.execute("DELETE FROM knowledge")
|
|
conn.execute("DELETE FROM knowledge")
|
|
|
for e in new_entries:
|
|
for e in new_entries:
|
|
|
|
|
+ eval_data = {
|
|
|
|
|
+ "score": e["score"],
|
|
|
|
|
+ "helpful": e["helpful"],
|
|
|
|
|
+ "harmful": e["harmful"],
|
|
|
|
|
+ "confidence": 0.9,
|
|
|
|
|
+ "helpful_history": [],
|
|
|
|
|
+ "harmful_history": []
|
|
|
|
|
+ }
|
|
|
|
|
+ source = {
|
|
|
|
|
+ "name": "slim",
|
|
|
|
|
+ "category": "exp",
|
|
|
|
|
+ "urls": [],
|
|
|
|
|
+ "agent_id": "slim",
|
|
|
|
|
+ "submitted_by": "system",
|
|
|
|
|
+ "timestamp": now
|
|
|
|
|
+ }
|
|
|
conn.execute(
|
|
conn.execute(
|
|
|
"""INSERT INTO knowledge
|
|
"""INSERT INTO knowledge
|
|
|
- (id, message_id, tags_type, scenario, content,
|
|
|
|
|
- source_urls, source_agent_id, source_timestamp,
|
|
|
|
|
- eval_score, eval_helpful, eval_harmful,
|
|
|
|
|
- eval_helpful_history, eval_harmful_history,
|
|
|
|
|
- metrics_helpful, metrics_harmful, created_at, updated_at)
|
|
|
|
|
- VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
- (e["id"], "", e["tags"], e["scenario"], e["content"],
|
|
|
|
|
- "", "slim", now,
|
|
|
|
|
- e["score"], 0, 0, "[]", "[]",
|
|
|
|
|
- e["helpful"], e["harmful"], now, now)
|
|
|
|
|
|
|
+ (id, message_id, types, task, tags, scopes, owner, content, source, eval, created_at, updated_at)
|
|
|
|
|
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
|
|
|
|
|
+ (
|
|
|
|
|
+ e["id"],
|
|
|
|
|
+ "",
|
|
|
|
|
+ json.dumps(e["types"]),
|
|
|
|
|
+ e["task"],
|
|
|
|
|
+ json.dumps({}),
|
|
|
|
|
+ json.dumps(["org:cybertogether"]),
|
|
|
|
|
+ "agent:slim",
|
|
|
|
|
+ e["content"],
|
|
|
|
|
+ json.dumps(source, ensure_ascii=False),
|
|
|
|
|
+ json.dumps(eval_data, ensure_ascii=False),
|
|
|
|
|
+ now,
|
|
|
|
|
+ now
|
|
|
|
|
+ )
|
|
|
)
|
|
)
|
|
|
conn.commit()
|
|
conn.commit()
|
|
|
|
|
|