丁云鹏 5 mesi fa
parent
commit
8f429f5c0c

+ 5 - 10
recommend-model-produce/src/main/python/tools/utils/static_ps/reader_helper_hdfs.py

@@ -79,22 +79,17 @@ def get_infer_reader(input_var, config):
 
 
 def get_file_list(data_path, config):
-    logger.info("data_path: {}".format(data_path))
-
-    cmd = "test -e {}".format(data_path)
-    ret, out = hdfs_client._run_cmd(cmd, redirect_stderr=True, retry_times=1)
-    logger.info("ret: {} out: {}".format(ret, out))
-    cmd = "ls " + data_path
-    ret, lines = hdfs_client._run_cmd(cmd)
-    logger.info("ret: {} lines: {}".format(ret, lines))
-
     dirs,file_list = hdfs_client.ls_dir(data_path)
     # 如果配置中指定了分割文件列表
     if config.get("runner.split_file_list"):
         logger.info("Split file list for worker {}".format(dist.get_rank()))
         file_list = get_file_shard(file_list)
     logger.info("File list: {}".format(file_list))
-    return file_list
+
+    base_url = f'hdfs://{configs["fs.default.name"]}'
+    full_paths = [base_url + file for file in file_list]
+
+    return full_paths
 
 
 def get_reader_generator(path, reader_name="Reader"):