丁云鹏 5 mēneši atpakaļ
vecāks
revīzija
89feda05e2

+ 5 - 5
recommend-model-produce/src/main/python/models/dssm/config_ps_hdfs.yaml

@@ -13,20 +13,20 @@
 # limitations under the License.
 
 runner:
-  train_data_dir: "/dypdemo"
+  train_data_dir: "/dw/recommend/model/55_dssm_i2i_traindata/"
   train_reader_path: "bq_reader_train_ps"  # importlib format
-  train_batch_size: 8
+  train_batch_size: 64
   model_save_path: "output_model_dssm"
   split_file_list: true
 
   reader_type: "QueueDataset"  # DataLoader / QueueDataset / RecDataset
   pipe_command: "python bq_reader_train_ps.py"
-  thread_num: 1
+  thread_num: 2
   sync_mode: "async"
 
 
   use_gpu: False
-  epochs: 1
+  epochs: 3
   print_interval: 1
   
   test_data_dir: "data/test"
@@ -37,7 +37,7 @@ runner:
   infer_end_epoch: 1
   infer_reader_type: "QueueDataset"
 
-  oss_object_name: "dyp/dssm_demo.tar.gz"
+  oss_object_name: "dyp/dssm.tar.gz"
 
 hyper_parameters:
   optimizer:

+ 1 - 1
recommend-model-produce/src/main/python/models/dssm/net.py

@@ -176,7 +176,7 @@ class DSSMLayer(nn.Layer):
 
 
     def forward(self, left_features, right_features, is_infer=False):
-        paddle.static.Print(left_features, message="left_features shape and value:")
+        # paddle.static.Print(left_features, message="left_features shape and value:")
         if is_infer:
             left_vec = self.get_leftvectors(left_features)
             return left_vec

+ 3 - 2
recommend-model-produce/src/main/python/tools/utils/static_ps/reader_helper_hdfs.py

@@ -80,7 +80,9 @@ def get_infer_reader(input_var, config):
 
 def get_file_list(data_path, config, file_extensions=['.gz']):
     all_files = []
-    sub_dirs,file_list = hdfs_client.ls_dir(data_path)    
+    sub_dirs,file_list = hdfs_client.ls_dir(data_path)   
+    sub_dirs = sub_dirs[-48:] 
+
     for sub_dir in sub_dirs:
         _, files = hdfs_client.ls_dir(sub_dir)
         for file in files:
@@ -88,7 +90,6 @@ def get_file_list(data_path, config, file_extensions=['.gz']):
             if file_extensions and not any(file.endswith(ext) for ext in file_extensions):
                 continue    
             all_files.append(file)
-
     all_files = all_files + file_list
     print(sub_dirs,all_files)
     # 如果配置中指定了分割文件列表