Browse Source

read hdfs

丁云鹏 5 months ago
parent
commit
284fc67a8d

+ 1 - 1
recommend-model-produce/src/main/python/models/dssm/config_ps_hdfs.yaml

@@ -24,7 +24,7 @@ runner:
   sync_mode: "async"
 
   use_gpu: False
-  epochs: 10
+  epochs: 1
   print_interval: 1
   
   test_data_dir: "data/test"

+ 6 - 0
recommend-model-produce/src/main/python/tools/static_ps_trainer_v2.py

@@ -32,6 +32,8 @@ import ast
 import numpy as np
 import struct
 from utils.utils_single import auc
+from utils.oss_client import OssClient
+import compress as compress
 
 __dir__ = os.path.dirname(os.path.abspath(__file__))
 sys.path.append(os.path.abspath(os.path.join(__dir__, '..')))
@@ -230,6 +232,10 @@ class Main(object):
                     [feed.name for feed in self.inference_feed_var],
                     [self.inference_target_var], self.exe)
 
+            compress.compress_tar(model_dir, "test")
+            client = HangZhouOSSClient("art-recommend")
+            client.put_object_from_file("dyp/test.tar.gz", "test.tar.gz")
+
         if reader_type == "InmemoryDataset":
             self.reader.release_memory()
 

+ 4 - 0
recommend-model-produce/src/main/python/tools/utils/static_ps/reader_helper_hdfs.py

@@ -84,6 +84,10 @@ def get_file_list(data_path, config):
     cmd = "test -e {}".format(data_path)
     ret, out = hdfs_client._run_cmd(cmd, redirect_stderr=True, retry_times=1)
     logger.info("ret: {} out: {}".format(ret, out))
+    cmd = "ls " + data_path
+    ret, lines = super()._run_cmd(cmd)
+    logger.info("ret: {} out: {}".format(ret, out))
+
     dirs,file_list = hdfs_client.ls_dir(data_path)
     # 如果配置中指定了分割文件列表
     if config.get("runner.split_file_list"):