|
@@ -1,6 +1,7 @@
|
|
|
from paddle.distributed.fleet.data_generator import MultiSlotDataGenerator
|
|
|
import sys
|
|
|
|
|
|
+
|
|
|
class DSSMReader(MultiSlotDataGenerator):
|
|
|
def __init__(self):
|
|
|
super(DSSMReader, self).__init__()
|
|
@@ -12,7 +13,7 @@ class DSSMReader(MultiSlotDataGenerator):
|
|
|
def line_process(self, line):
|
|
|
try:
|
|
|
# 按tab分割样本的各个字段
|
|
|
-
|
|
|
+ sys.stderr.write(f"processing line: {line}\n")
|
|
|
sample_values = line.rstrip('\n').split('\t')
|
|
|
if len(sample_values) == 6: # 训练格式
|
|
|
sample_id, label, vid_left,vid_right,left_features, right_features = sample_values
|