丁云鹏 5 месяцев назад
Родитель
Сommit
daf09cca39

+ 6 - 14
recommend-model-produce/src/main/python/tools/static_ps_trainer_v2.py

@@ -15,7 +15,7 @@
 from __future__ import print_function
 import os
 os.environ['FLAGS_enable_pir_api'] = '0'
-from utils.static_ps.reader_helper_hdfs import get_reader, get_example_num, get_file_list, get_word_num
+from utils.static_ps.reader_helper_hdfs import get_reader
 from utils.static_ps.program_helper import get_model, get_strategy, set_dump_config
 from utils.static_ps.metric_helper import set_zero, get_global_auc
 from utils.static_ps.common_ps import YamlHelper, is_distributed_env
@@ -194,7 +194,7 @@ class Main(object):
                 self.dataset_train_loop(epoch)
 
             epoch_time = time.time() - epoch_start_time
-            epoch_speed = self.example_nums / epoch_time
+
             if use_auc is True:
                 global_auc = get_global_auc(paddle.static.global_scope(),
                                             self.model.stat_pos.name,
@@ -209,13 +209,13 @@ class Main(object):
                 set_zero(self.model.batch_stat_neg.name,
                          paddle.static.global_scope())
                 logger.info(
-                    "Epoch: {}, using time: {} second, ips: {} {}/sec. auc: {}".
-                    format(epoch, epoch_time, epoch_speed, self.count_method,
+                    "Epoch: {}, using time: {} second, ips: {}/sec. auc: {}".
+                    format(epoch, epoch_time, self.count_method,
                            global_auc))
             else:
                 logger.info(
-                    "Epoch: {}, using time {} second, ips {} {}/sec.".format(
-                        epoch, epoch_time, epoch_speed, self.count_method))
+                    "Epoch: {}, using time {} second, ips  {}/sec.".format(
+                        epoch, epoch_time, self.count_method))
 
             self.train_result_dict["speed"].append(epoch_speed)
 
@@ -244,14 +244,6 @@ class Main(object):
         self.example_nums = 0
         self.count_method = self.config.get("runner.example_count_method",
                                             "example")
-        if self.count_method == "example":
-            self.example_nums = get_example_num(self.file_list)
-        elif self.count_method == "word":
-            self.example_nums = get_word_num(self.file_list)
-        else:
-            raise ValueError(
-                "Set static_benchmark.example_count_method for example / word for example count."
-            )
 
     def dataset_train_loop(self, epoch):
         logger.info("Epoch: {}, Running Dataset Begin.".format(epoch))

+ 0 - 23
recommend-model-produce/src/main/python/tools/utils/static_ps/reader_helper_hdfs.py

@@ -93,29 +93,6 @@ def get_file_list(data_path, config):
     return file_list
 
 
-def get_example_num(file_list):
-    count = 0
-    for f in file_list:
-        last_count = count
-        for _, _ in enumerate(open(f, 'r')):
-            count += 1
-        logger.info("File: %s has %s examples" % (f, count - last_count))
-    logger.info("Total example: %s" % count)
-    return count
-
-
-def get_word_num(file_list):
-    count = 0
-    for f in file_list:
-        last_count = count
-        for index, line in enumerate(open(f, 'r')):
-            line = line.rstrip().split()
-            count += len(line)
-        logger.info("file: %s has %s words" % (f, count - last_count))
-    logger.info("Total words: %s" % count)
-    return count
-
-
 def get_reader_generator(path, reader_name="Reader"):
     reader_class = common_ps.lazy_instance_by_fliename(path, reader_name)()
     return reader_class