|
@@ -28,14 +28,18 @@ class RecDataset(IterableDataset):
|
|
|
for file in self.file_list:
|
|
|
with open(file, "r") as rf:
|
|
|
for line in rf:
|
|
|
- output_list = []
|
|
|
- features = line.rstrip('\n').split('\t')
|
|
|
- query = [
|
|
|
- float(feature) for feature in features[0].split(',')
|
|
|
- ]
|
|
|
- output_list.append(np.array(query).astype('float32'))
|
|
|
- pos_doc = [
|
|
|
- float(feature) for feature in features[1].split(',')
|
|
|
- ]
|
|
|
- output_list.append(np.array(pos_doc).astype('float32'))
|
|
|
+ sample_values = line.rstrip('\n').split(' ')
|
|
|
+ sample_id, left_features = sample_values
|
|
|
+ # 处理左右视频特征
|
|
|
+ left_features = [float(x) for x in left_features.split(',')]
|
|
|
+ # 验证特征维度
|
|
|
+ if len(left_features) != self.feature_dim :
|
|
|
+ return None
|
|
|
+
|
|
|
+ # 构建输出列表
|
|
|
+ output = []
|
|
|
+ output.append(("sample_id", [sample_id])) # 样本ID
|
|
|
+ output.append(("left_features", left_features)) # 左视频特征
|
|
|
+
|
|
|
+
|
|
|
yield output_list
|