|
@@ -80,7 +80,9 @@ def get_infer_reader(input_var, config):
|
|
|
|
|
|
def get_file_list(data_path, config, file_extensions=['.gz']):
|
|
|
all_files = []
|
|
|
- sub_dirs,file_list = hdfs_client.ls_dir(data_path)
|
|
|
+ sub_dirs,file_list = hdfs_client.ls_dir(data_path)
|
|
|
+ sub_dirs = sub_dirs[-48:]
|
|
|
+
|
|
|
for sub_dir in sub_dirs:
|
|
|
_, files = hdfs_client.ls_dir(sub_dir)
|
|
|
for file in files:
|
|
@@ -88,7 +90,6 @@ def get_file_list(data_path, config, file_extensions=['.gz']):
|
|
|
if file_extensions and not any(file.endswith(ext) for ext in file_extensions):
|
|
|
continue
|
|
|
all_files.append(file)
|
|
|
-
|
|
|
all_files = all_files + file_list
|
|
|
print(sub_dirs,all_files)
|
|
|
# 如果配置中指定了分割文件列表
|