| 
					
				 | 
			
			
				@@ -79,22 +79,17 @@ def get_infer_reader(input_var, config): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_file_list(data_path, config): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    logger.info("data_path: {}".format(data_path)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    cmd = "test -e {}".format(data_path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ret, out = hdfs_client._run_cmd(cmd, redirect_stderr=True, retry_times=1) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    logger.info("ret: {} out: {}".format(ret, out)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    cmd = "ls " + data_path 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    ret, lines = hdfs_client._run_cmd(cmd) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    logger.info("ret: {} lines: {}".format(ret, lines)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     dirs,file_list = hdfs_client.ls_dir(data_path) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     # 如果配置中指定了分割文件列表 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     if config.get("runner.split_file_list"): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         logger.info("Split file list for worker {}".format(dist.get_rank())) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         file_list = get_file_shard(file_list) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     logger.info("File list: {}".format(file_list)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-    return file_list 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    base_url = f'hdfs://{configs["fs.default.name"]}' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    full_paths = [base_url + file for file in file_list] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    return full_paths 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 def get_reader_generator(path, reader_name="Reader"): 
			 |