from pymilvus import FieldSchema, DataType # milvus 向量数据库 fields = [ FieldSchema( name="id", dtype=DataType.INT64, is_primary=True, auto_id=True, description="自增id", ), FieldSchema( name="doc_id", dtype=DataType.VARCHAR, max_length=64, description="文档id" ), FieldSchema(name="chunk_id", dtype=DataType.INT64, description="文档分块id"), # 三种向量字段 FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560), FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560), FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560), # metadata FieldSchema( name="topic", dtype=DataType.VARCHAR, max_length=255, description="主题" ), FieldSchema( name="domain", dtype=DataType.VARCHAR, max_length=100, description="领域" ), FieldSchema( name="task_type", dtype=DataType.VARCHAR, max_length=100, description="任务类型" ), FieldSchema( name="summary", dtype=DataType.VARCHAR, max_length=512, description="总结" ), FieldSchema( name="keywords", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=100, max_capacity=5, description="关键词", ), FieldSchema( name="concepts", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=100, max_capacity=5, description="主要知识点", ), FieldSchema( name="questions", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=5, description="隐含问题", ), FieldSchema( name="entities", dtype=DataType.ARRAY, element_type=DataType.VARCHAR, max_length=200, max_capacity=5, description="命名实体", ), FieldSchema(name="topic_purity", dtype=DataType.FLOAT), FieldSchema(name="tokens", dtype=DataType.INT64), ] __all__ = ["fields"]