| 
														
															@@ -89,12 +89,12 @@ def get_file_list(data_path, config, file_extensions=['.gz']): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															                 continue     
														 | 
														
														 | 
														
															                 continue     
														 | 
													
												
											
												
													
														| 
														 | 
														
															             all_files.append(file) 
														 | 
														
														 | 
														
															             all_files.append(file) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															      
														 | 
														
														 | 
														
															      
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    print(sub_dirs,all_files) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    #print(sub_dirs,all_files) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     # 如果配置中指定了分割文件列表 
														 | 
														
														 | 
														
															     # 如果配置中指定了分割文件列表 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     if config.get("runner.split_file_list"): 
														 | 
														
														 | 
														
															     if config.get("runner.split_file_list"): 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         logger.info("Split file list for worker {}".format(dist.get_rank())) 
														 | 
														
														 | 
														
															         logger.info("Split file list for worker {}".format(dist.get_rank())) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															         all_files = fleet.util.get_file_shard(all_files) 
														 | 
														
														 | 
														
															         all_files = fleet.util.get_file_shard(all_files) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															-    logger.info("File list: {}".format(all_files)) 
														 | 
														
														 | 
														
															 
														 | 
													
												
											
												
													
														| 
														 | 
														
															 
														 | 
														
														 | 
														
															+    logger.info("File list: {}".format(sub_dirs)) 
														 | 
													
												
											
												
													
														| 
														 | 
														
															  
														 | 
														
														 | 
														
															  
														 | 
													
												
											
												
													
														| 
														 | 
														
															     base_url = f'{configs["fs.default.name"]}' 
														 | 
														
														 | 
														
															     base_url = f'{configs["fs.default.name"]}' 
														 | 
													
												
											
												
													
														| 
														 | 
														
															     full_paths = [base_url + file for file in all_files] 
														 | 
														
														 | 
														
															     full_paths = [base_url + file for file in all_files] 
														 |