models.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495
  1. from dataclasses import dataclass
  2. from typing import List
  3. @dataclass
  4. class Document:
  5. doc_id: str
  6. dataset_id: int
  7. @dataclass
  8. class GraphChunk:
  9. milvus_id: int
  10. chunk_id: int
  11. doc_id: str
  12. topic: str
  13. domain: str
  14. text_type: int
  15. task_type: str
  16. @dataclass
  17. class ChunkRelations:
  18. entities: List[str]
  19. concepts: List[str]
  20. keywords: List[str]
  21. domain: str
  22. topic: str
  23. QUERY = """
  24. // 1) Document & GraphChunk
  25. MERGE (d:Document {doc_id: $doc_id})
  26. ON CREATE SET d.dataset_id = $dataset_id
  27. SET d.dataset_id = $dataset_id
  28. MERGE (gc:GraphChunk {milvus_id: $milvus_id})
  29. ON CREATE SET gc.chunk_id = $chunk_id, gc.doc_id = $doc_id
  30. SET gc.topic = $topic,
  31. gc.domain = $domain,
  32. gc.text_type = $text_type,
  33. gc.task_type = $task_type,
  34. gc.doc_id = $doc_id
  35. MERGE (gc)-[:BELONGS_TO]->(d)
  36. MERGE (d)-[:HAS_CHUNK]->(gc)
  37. // 2) 参数准备
  38. WITH gc,
  39. COALESCE($entities, []) AS entities,
  40. COALESCE($concepts, []) AS concepts,
  41. COALESCE($keywords, []) AS keywords,
  42. $domain_name AS domain_name,
  43. $topic_name AS topic_name
  44. // 3) Entities
  45. UNWIND entities AS e_name
  46. WITH gc, e_name, concepts, keywords, domain_name, topic_name
  47. WITH gc, TRIM(e_name) AS e_name, concepts, keywords, domain_name, topic_name
  48. WHERE e_name <> ""
  49. MERGE (e:Entity {name: e_name})
  50. MERGE (gc)-[:HAS_ENTITY]->(e)
  51. // 4) Concepts
  52. WITH gc, concepts, keywords, domain_name, topic_name
  53. UNWIND concepts AS c_name
  54. WITH gc, c_name, keywords, domain_name, topic_name
  55. WITH gc, TRIM(c_name) AS c_name, keywords, domain_name, topic_name
  56. WHERE c_name <> ""
  57. MERGE (co:Concept {name: c_name})
  58. MERGE (gc)-[:HAS_CONCEPT]->(co)
  59. // 5) Keywords
  60. WITH gc, keywords, domain_name, topic_name
  61. UNWIND keywords AS k_name
  62. WITH gc, k_name, domain_name, topic_name
  63. WITH gc, TRIM(k_name) AS k_name, domain_name, topic_name
  64. WHERE k_name <> ""
  65. MERGE (k:Keyword {name: k_name})
  66. MERGE (gc)-[:HAS_KEYWORD]->(k)
  67. // 6) Domain(条件执行,用 FOREACH 替代 CALL)
  68. WITH gc, domain_name, topic_name
  69. FOREACH (_ IN CASE WHEN domain_name IS NOT NULL AND TRIM(domain_name) <> "" THEN [1] ELSE [] END |
  70. MERGE (d_node:Domain {name: TRIM(domain_name)})
  71. MERGE (gc)-[:HAS_DOMAIN]->(d_node)
  72. )
  73. // 7) Topic(条件执行,用 FOREACH 替代 CALL)
  74. WITH gc, topic_name
  75. FOREACH (_ IN CASE WHEN topic_name IS NOT NULL AND TRIM(topic_name) <> "" THEN [1] ELSE [] END |
  76. MERGE (t:Topic {name: TRIM(topic_name)})
  77. MERGE (gc)-[:HAS_TOPIC]->(t)
  78. )
  79. """