|
@@ -14,6 +14,7 @@ class DSSMReader(MultiSlotDataGenerator):
|
|
try:
|
|
try:
|
|
# 按tab分割样本的各个字段
|
|
# 按tab分割样本的各个字段
|
|
sample_values = line.rstrip('\n').split('\t')
|
|
sample_values = line.rstrip('\n').split('\t')
|
|
|
|
+ sys.stderr.write(f"sample_values length: {len(sample_values)}\n")
|
|
if len(sample_values) == 6: # 训练格式
|
|
if len(sample_values) == 6: # 训练格式
|
|
sample_id, label, vid_left,vid_right,left_features, right_features = sample_values
|
|
sample_id, label, vid_left,vid_right,left_features, right_features = sample_values
|
|
# 转换label为整数
|
|
# 转换label为整数
|
|
@@ -26,7 +27,7 @@ class DSSMReader(MultiSlotDataGenerator):
|
|
# 验证特征维度
|
|
# 验证特征维度
|
|
if len(left_features) != self.feature_dim or len(right_features) != self.feature_dim:
|
|
if len(left_features) != self.feature_dim or len(right_features) != self.feature_dim:
|
|
return None
|
|
return None
|
|
- sys.stderr.write(f"left_features length: {len(left_features)}\n")
|
|
|
|
|
|
+
|
|
# 构建输出列表
|
|
# 构建输出列表
|
|
output = []
|
|
output = []
|
|
#output.append(("sample_id", [sample_id])) # 样本ID
|
|
#output.append(("sample_id", [sample_id])) # 样本ID
|