|
@@ -2,34 +2,57 @@ from pymilvus import FieldSchema, DataType
|
|
|
|
|
|
# milvus 向量数据库
|
|
# milvus 向量数据库
|
|
fields = [
|
|
fields = [
|
|
- FieldSchema(name="chunk_id", dtype=DataType.INT64, is_primary=True, auto_id=False),
|
|
|
|
- FieldSchema(name="doc_id", dtype=DataType.VARCHAR, max_length=64),
|
|
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="id",
|
|
|
|
+ dtype=DataType.INT64,
|
|
|
|
+ is_primary=True,
|
|
|
|
+ auto_id=True,
|
|
|
|
+ description="自增逐渐id",
|
|
|
|
+ ),
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="doc_id", dtype=DataType.VARCHAR, max_length=64, description="文档id"
|
|
|
|
+ ),
|
|
|
|
+ FieldSchema(name="chunk_id", dtype=DataType.INT64, description="文档分块id"),
|
|
# 三种向量字段
|
|
# 三种向量字段
|
|
FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
FieldSchema(name="vector_text", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
FieldSchema(name="vector_summary", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
FieldSchema(name="vector_questions", dtype=DataType.FLOAT_VECTOR, dim=2560),
|
|
# metadata
|
|
# metadata
|
|
- FieldSchema(name="topic", dtype=DataType.VARCHAR, max_length=255),
|
|
|
|
- FieldSchema(name="domain", dtype=DataType.VARCHAR, max_length=100),
|
|
|
|
- FieldSchema(name="task_type", dtype=DataType.VARCHAR, max_length=100),
|
|
|
|
- FieldSchema(name="summary", dtype=DataType.VARCHAR, max_length=512),
|
|
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="topic", dtype=DataType.VARCHAR, max_length=255, description="主题"
|
|
|
|
+ ),
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="domain", dtype=DataType.VARCHAR, max_length=100, description="领域"
|
|
|
|
+ ),
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="task_type", dtype=DataType.VARCHAR, max_length=100, description="任务类型"
|
|
|
|
+ ),
|
|
|
|
+ FieldSchema(
|
|
|
|
+ name="summary", dtype=DataType.VARCHAR, max_length=512, description="总结"
|
|
|
|
+ ),
|
|
FieldSchema(
|
|
FieldSchema(
|
|
name="keywords",
|
|
name="keywords",
|
|
dtype=DataType.ARRAY,
|
|
dtype=DataType.ARRAY,
|
|
element_type=DataType.VARCHAR,
|
|
element_type=DataType.VARCHAR,
|
|
max_length=100,
|
|
max_length=100,
|
|
|
|
+ max_capacity=5,
|
|
|
|
+ description="关键词",
|
|
),
|
|
),
|
|
FieldSchema(
|
|
FieldSchema(
|
|
name="concepts",
|
|
name="concepts",
|
|
dtype=DataType.ARRAY,
|
|
dtype=DataType.ARRAY,
|
|
element_type=DataType.VARCHAR,
|
|
element_type=DataType.VARCHAR,
|
|
max_length=100,
|
|
max_length=100,
|
|
|
|
+ max_capacity=5,
|
|
|
|
+ description="主要知识点",
|
|
),
|
|
),
|
|
FieldSchema(
|
|
FieldSchema(
|
|
name="questions",
|
|
name="questions",
|
|
dtype=DataType.ARRAY,
|
|
dtype=DataType.ARRAY,
|
|
element_type=DataType.VARCHAR,
|
|
element_type=DataType.VARCHAR,
|
|
max_length=200,
|
|
max_length=200,
|
|
|
|
+ max_capacity=5,
|
|
|
|
+ description="隐含问题",
|
|
),
|
|
),
|
|
FieldSchema(name="topic_purity", dtype=DataType.FLOAT),
|
|
FieldSchema(name="topic_purity", dtype=DataType.FLOAT),
|
|
FieldSchema(name="tokens", dtype=DataType.INT64),
|
|
FieldSchema(name="tokens", dtype=DataType.INT64),
|