|
@@ -0,0 +1,95 @@
|
|
|
|
+from dataclasses import dataclass
|
|
|
|
+from typing import List
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@dataclass
|
|
|
|
+class Document:
|
|
|
|
+ doc_id: str
|
|
|
|
+ dataset_id: int
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@dataclass
|
|
|
|
+class GraphChunk:
|
|
|
|
+ milvus_id: int
|
|
|
|
+ chunk_id: int
|
|
|
|
+ doc_id: str
|
|
|
|
+ topic: str
|
|
|
|
+ domain: str
|
|
|
|
+ text_type: int
|
|
|
|
+ task_type: str
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@dataclass
|
|
|
|
+class ChunkRelations:
|
|
|
|
+ entities: List[str]
|
|
|
|
+ concepts: List[str]
|
|
|
|
+ keywords: List[str]
|
|
|
|
+ domain: str
|
|
|
|
+ topic: str
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+QUERY = """
|
|
|
|
+// 1) Document & GraphChunk
|
|
|
|
+MERGE (d:Document {doc_id: $doc_id})
|
|
|
|
+ ON CREATE SET d.dataset_id = $dataset_id
|
|
|
|
+ SET d.dataset_id = $dataset_id
|
|
|
|
+
|
|
|
|
+MERGE (gc:GraphChunk {milvus_id: $milvus_id})
|
|
|
|
+ ON CREATE SET gc.chunk_id = $chunk_id, gc.doc_id = $doc_id
|
|
|
|
+ SET gc.topic = $topic,
|
|
|
|
+ gc.domain = $domain,
|
|
|
|
+ gc.text_type = $text_type,
|
|
|
|
+ gc.task_type = $task_type,
|
|
|
|
+ gc.doc_id = $doc_id
|
|
|
|
+
|
|
|
|
+MERGE (gc)-[:BELONGS_TO]->(d)
|
|
|
|
+MERGE (d)-[:HAS_CHUNK]->(gc)
|
|
|
|
+
|
|
|
|
+// 2) 参数准备
|
|
|
|
+WITH gc,
|
|
|
|
+ COALESCE($entities, []) AS entities,
|
|
|
|
+ COALESCE($concepts, []) AS concepts,
|
|
|
|
+ COALESCE($keywords, []) AS keywords,
|
|
|
|
+ $domain_name AS domain_name,
|
|
|
|
+ $topic_name AS topic_name
|
|
|
|
+
|
|
|
|
+// 3) Entities
|
|
|
|
+UNWIND entities AS e_name
|
|
|
|
+ WITH gc, e_name, concepts, keywords, domain_name, topic_name
|
|
|
|
+ WITH gc, TRIM(e_name) AS e_name, concepts, keywords, domain_name, topic_name
|
|
|
|
+ WHERE e_name <> ""
|
|
|
|
+ MERGE (e:Entity {name: e_name})
|
|
|
|
+ MERGE (gc)-[:HAS_ENTITY]->(e)
|
|
|
|
+
|
|
|
|
+// 4) Concepts
|
|
|
|
+WITH gc, concepts, keywords, domain_name, topic_name
|
|
|
|
+UNWIND concepts AS c_name
|
|
|
|
+ WITH gc, c_name, keywords, domain_name, topic_name
|
|
|
|
+ WITH gc, TRIM(c_name) AS c_name, keywords, domain_name, topic_name
|
|
|
|
+ WHERE c_name <> ""
|
|
|
|
+ MERGE (co:Concept {name: c_name})
|
|
|
|
+ MERGE (gc)-[:HAS_CONCEPT]->(co)
|
|
|
|
+
|
|
|
|
+// 5) Keywords
|
|
|
|
+WITH gc, keywords, domain_name, topic_name
|
|
|
|
+UNWIND keywords AS k_name
|
|
|
|
+ WITH gc, k_name, domain_name, topic_name
|
|
|
|
+ WITH gc, TRIM(k_name) AS k_name, domain_name, topic_name
|
|
|
|
+ WHERE k_name <> ""
|
|
|
|
+ MERGE (k:Keyword {name: k_name})
|
|
|
|
+ MERGE (gc)-[:HAS_KEYWORD]->(k)
|
|
|
|
+
|
|
|
|
+// 6) Domain(条件执行,用 FOREACH 替代 CALL)
|
|
|
|
+WITH gc, domain_name, topic_name
|
|
|
|
+FOREACH (_ IN CASE WHEN domain_name IS NOT NULL AND TRIM(domain_name) <> "" THEN [1] ELSE [] END |
|
|
|
|
+ MERGE (d_node:Domain {name: TRIM(domain_name)})
|
|
|
|
+ MERGE (gc)-[:HAS_DOMAIN]->(d_node)
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+// 7) Topic(条件执行,用 FOREACH 替代 CALL)
|
|
|
|
+WITH gc, topic_name
|
|
|
|
+FOREACH (_ IN CASE WHEN topic_name IS NOT NULL AND TRIM(topic_name) <> "" THEN [1] ELSE [] END |
|
|
|
|
+ MERGE (t:Topic {name: TRIM(topic_name)})
|
|
|
|
+ MERGE (gc)-[:HAS_TOPIC]->(t)
|
|
|
|
+)
|
|
|
|
+"""
|