1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495 |
- from dataclasses import dataclass
- from typing import List
- @dataclass
- class Document:
- doc_id: str
- dataset_id: int
- @dataclass
- class GraphChunk:
- milvus_id: int
- chunk_id: int
- doc_id: str
- topic: str
- domain: str
- text_type: int
- task_type: str
- @dataclass
- class ChunkRelations:
- entities: List[str]
- concepts: List[str]
- keywords: List[str]
- domain: str
- topic: str
- QUERY = """
- // 1) Document & GraphChunk
- MERGE (d:Document {doc_id: $doc_id})
- ON CREATE SET d.dataset_id = $dataset_id
- SET d.dataset_id = $dataset_id
- MERGE (gc:GraphChunk {milvus_id: $milvus_id})
- ON CREATE SET gc.chunk_id = $chunk_id, gc.doc_id = $doc_id
- SET gc.topic = $topic,
- gc.domain = $domain,
- gc.text_type = $text_type,
- gc.task_type = $task_type,
- gc.doc_id = $doc_id
- MERGE (gc)-[:BELONGS_TO]->(d)
- MERGE (d)-[:HAS_CHUNK]->(gc)
- // 2) 参数准备
- WITH gc,
- COALESCE($entities, []) AS entities,
- COALESCE($concepts, []) AS concepts,
- COALESCE($keywords, []) AS keywords,
- $domain_name AS domain_name,
- $topic_name AS topic_name
- // 3) Entities
- UNWIND entities AS e_name
- WITH gc, e_name, concepts, keywords, domain_name, topic_name
- WITH gc, TRIM(e_name) AS e_name, concepts, keywords, domain_name, topic_name
- WHERE e_name <> ""
- MERGE (e:Entity {name: e_name})
- MERGE (gc)-[:HAS_ENTITY]->(e)
- // 4) Concepts
- WITH gc, concepts, keywords, domain_name, topic_name
- UNWIND concepts AS c_name
- WITH gc, c_name, keywords, domain_name, topic_name
- WITH gc, TRIM(c_name) AS c_name, keywords, domain_name, topic_name
- WHERE c_name <> ""
- MERGE (co:Concept {name: c_name})
- MERGE (gc)-[:HAS_CONCEPT]->(co)
- // 5) Keywords
- WITH gc, keywords, domain_name, topic_name
- UNWIND keywords AS k_name
- WITH gc, k_name, domain_name, topic_name
- WITH gc, TRIM(k_name) AS k_name, domain_name, topic_name
- WHERE k_name <> ""
- MERGE (k:Keyword {name: k_name})
- MERGE (gc)-[:HAS_KEYWORD]->(k)
- // 6) Domain(条件执行,用 FOREACH 替代 CALL)
- WITH gc, domain_name, topic_name
- FOREACH (_ IN CASE WHEN domain_name IS NOT NULL AND TRIM(domain_name) <> "" THEN [1] ELSE [] END |
- MERGE (d_node:Domain {name: TRIM(domain_name)})
- MERGE (gc)-[:HAS_DOMAIN]->(d_node)
- )
- // 7) Topic(条件执行,用 FOREACH 替代 CALL)
- WITH gc, topic_name
- FOREACH (_ IN CASE WHEN topic_name IS NOT NULL AND TRIM(topic_name) <> "" THEN [1] ELSE [] END |
- MERGE (t:Topic {name: TRIM(topic_name)})
- MERGE (gc)-[:HAS_TOPIC]->(t)
- )
- """
|