|
@@ -5,13 +5,17 @@ class DSSMReader(MultiSlotDataGenerator):
|
|
|
def __init__(self):
|
|
|
super(DSSMReader, self).__init__()
|
|
|
self.feature_dim = 5 # 设置特征维度
|
|
|
-
|
|
|
+ self.data_count = 0 # 添加计数器
|
|
|
def init(self, config=None):
|
|
|
pass
|
|
|
|
|
|
def line_process(self, line):
|
|
|
try:
|
|
|
+ self.data_count += 1
|
|
|
+ if self.data_count % 100 == 0: # 每100条打印一次
|
|
|
+ print(f"Processing line {self.data_count}: {line[:100]}...") # 打印前100个字符
|
|
|
# 按tab分割样本的各个字段
|
|
|
+
|
|
|
sample_values = line.rstrip('\n').split('\t')
|
|
|
if len(sample_values) == 6: # 训练格式
|
|
|
sample_id, label, vid_left,vid_right,left_features, right_features = sample_values
|