CollectionTrans.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. from pymilvus import Collection, CollectionSchema, FieldSchema, DataType, connections
  2. # 连接到 Milvus
  3. connections.connect("default", host="127.0.0.1", port="19530")
  4. # 定义新的字段和 schema
  5. dim = 768 # 假设 dim 已经定义
  6. new_fields = [
  7. # 旧字段
  8. FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
  9. FieldSchema(name="title_vector",
  10. dtype=DataType.FLOAT_VECTOR, dim=dim),
  11. FieldSchema(name="title", dtype=DataType.VARCHAR, max_length=256,
  12. description="视频标题"),
  13. FieldSchema(name="preview_times",
  14. dtype=DataType.INT64, description="预曝光次数"),
  15. FieldSchema(name="preview_users", dtype=DataType.INT64,
  16. description="预曝光用户数"),
  17. FieldSchema(name="view_times", dtype=DataType.INT64,
  18. description="曝光次数"),
  19. FieldSchema(name="view_users", dtype=DataType.INT64,
  20. description="曝光用户数"),
  21. FieldSchema(name="play_times", dtype=DataType.INT64,
  22. description="播放次数"),
  23. FieldSchema(name="play_users", dtype=DataType.INT64,
  24. description="播放用户数"),
  25. FieldSchema(name="share_times", dtype=DataType.INT64,
  26. description="分享次数"),
  27. FieldSchema(name="share_users", dtype=DataType.INT64,
  28. description="分享用户数"),
  29. FieldSchema(name="return_times",
  30. dtype=DataType.INT64, description="回看次数"),
  31. FieldSchema(name="return_users",
  32. dtype=DataType.INT64, description="回看用户数"),
  33. # 新字段
  34. FieldSchema(name="create_time", dtype=DataType.INT64, description="创建时间戳")
  35. ]
  36. new_schema = CollectionSchema(new_fields, description="两年内的分发过的视频标题")
  37. # 创建新 collection
  38. new_collection_name = "two_year_all_viewed_videos"
  39. new_collection = Collection(name=new_collection_name, schema=new_schema)
  40. # 从旧 collection 中读取数据并迁移
  41. old_collection_name = "two_year_all_video_titles"
  42. old_collection = Collection(name=old_collection_name)
  43. old_data = ... # 读取旧 collection 数据的逻辑
  44. # 为旧数据添加 '创建时间' 字段
  45. new_data = ...
  46. for data in old_data:
  47. data['创建时间'] = ... # 设置 '创建时间' 字段的值
  48. new_data.append(data)
  49. # 插入数据到新 collection
  50. new_collection.insert(new_data)
  51. # 验证新 collection 数据正确后,可以选择删除旧 collection
  52. # old_collection.drop()