often 5 hónapja
szülő
commit
d9c90a980b

+ 1 - 1
recommend-model-produce/src/main/python/models/dssm/config_ps.yaml

@@ -38,7 +38,7 @@ hyper_parameters:
   optimizer:
     class: adam
     learning_rate: 0.001
-    strategy: sync
+    strategy: async
   trigram_d: 2900
   neg_num: 1
   slice_end: 8

+ 12 - 8
recommend-model-produce/src/main/python/tools/static_ps_trainer.py

@@ -100,9 +100,9 @@ class Main(object):
         self.pure_bf16 = self.config['pure_bf16']
 
     def run(self):
-        logger.info("Begin Debug33311111") 
+        logger.info("Begin 11111111") 
         self.init_fleet_with_gloo()
-        logger.info("Begin Debug333222222") 
+        logger.info("Begin 22222222") 
         self.network()
         if fleet.is_server():
             self.run_server()
@@ -115,13 +115,18 @@ class Main(object):
     def init_fleet_with_gloo(use_gloo=True):
         if use_gloo:
             os.environ["PADDLE_WITH_GLOO"] = "0"
+            logger.info("Begin 11111111222222") 
             role = role_maker.PaddleCloudRoleMaker(
                 is_collective=False,
                 init_gloo=False
             ) 
-         
+            logger.info("Begin 11111111333333") 
             fleet.init(role)
-
+            role = fleet._role_maker
+            
+            logger.info("is_collective:", role._is_collective)
+            logger.info("is_first_worker:", role._is_first_worker())
+            logger.info("worker_index:", role._worker_index())
         else:
             fleet.init()
 
@@ -323,12 +328,11 @@ class Main(object):
 
 
 if __name__ == "__main__":
-    logger.info("Begin Debug111")  
     paddle.enable_static()
-    logger.info("Begin Debug222")  
+ 
     config = parse_args()
     os.environ["CPU_NUM"] = str(config.get("runner.thread_num"))
     benchmark_main = Main(config)
-    logger.info("Begin Debug333")  
+ 
     benchmark_main.run()
-    logger.info("Begin Debug4444")  
+