|
@@ -0,0 +1,148 @@
|
|
|
+package com.aliyun.odps.spark.examples.myUtils
|
|
|
+
|
|
|
+
|
|
|
+ * Author: zhangbo58
|
|
|
+ * Description:
|
|
|
+ *
|
|
|
+ */
|
|
|
+import org.apache.commons.lang.time.DateUtils
|
|
|
+import org.apache.hadoop.conf.Configuration
|
|
|
+import org.apache.hadoop.fs.permission.FsPermission
|
|
|
+import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
|
|
|
+
|
|
|
+import scala.collection.mutable.ArrayBuffer
|
|
|
+
|
|
|
+object MyHdfsUtils {
|
|
|
+ def main(args: Array[String]): Unit = {
|
|
|
+ val path = "zhangbo58/"
|
|
|
+
|
|
|
+ println("获取目录下的一级文件和目录")
|
|
|
+ getFilesAndDirs(path).foreach(println)
|
|
|
+ println("获取目录下的一级文件")
|
|
|
+ getFiles(path).foreach(println)
|
|
|
+ println("获取目录下的一级目录")
|
|
|
+ getDirs(path).foreach(println)
|
|
|
+ println("获取目录下所有文件")
|
|
|
+ getAllFiles(path).foreach(println)
|
|
|
+ }
|
|
|
+
|
|
|
+ def getHdfs(path: String): FileSystem = {
|
|
|
+ val conf = new Configuration()
|
|
|
+
|
|
|
+ val fs = org.apache.hadoop.fs.FileSystem.get(new org.apache.hadoop.conf.Configuration())
|
|
|
+ fs
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ def getFilesAndDirs(path: String): Array[Path] = {
|
|
|
+ val fs = getHdfs(path).listStatus(new Path(path))
|
|
|
+ FileUtil.stat2Paths(fs)
|
|
|
+ }
|
|
|
+
|
|
|
+ def getFiles(path: String): Array[String] = {
|
|
|
+ getFilesAndDirs(path).filter(getHdfs(path).getFileStatus(_).isFile())
|
|
|
+ .map(_.toString)
|
|
|
+ }
|
|
|
+
|
|
|
+ def getDirs(path: String): Array[String] = {
|
|
|
+ getFilesAndDirs(path).filter(getHdfs(path).getFileStatus(_).isDirectory)
|
|
|
+ .map(_.toString)
|
|
|
+ }
|
|
|
+
|
|
|
+ def getAllFiles(path: String): ArrayBuffer[String] = {
|
|
|
+ val arr = ArrayBuffer[String]()
|
|
|
+ val hdfs = getHdfs(path)
|
|
|
+ val getPath = getFilesAndDirs(path)
|
|
|
+ getPath.foreach(patha => {
|
|
|
+ if (hdfs.getFileStatus(patha).isFile())
|
|
|
+ arr += patha.toString
|
|
|
+ else {
|
|
|
+ arr ++= getAllFiles(patha.toString())
|
|
|
+ }
|
|
|
+ })
|
|
|
+ arr
|
|
|
+ }
|
|
|
+ def ifHDFSHasData(path: String): Boolean = {
|
|
|
+ val hdfs_path = new org.apache.hadoop.fs.Path(path.toString)
|
|
|
+ val hdfs = org.apache.hadoop.fs.FileSystem.get(new org.apache.hadoop.conf.Configuration())
|
|
|
+
|
|
|
+ var rst = false
|
|
|
+ if (hdfs.exists(hdfs_path)) {
|
|
|
+
|
|
|
+ val statusList = hdfs.listStatus(hdfs_path)
|
|
|
+ for (status <- statusList if !rst && (status.getPath.toString.contains("part-") || status.getPath.toString.contains("_SUCCESS"))) {
|
|
|
+ if (status.getLen > 0) {
|
|
|
+ rst = true
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ rst
|
|
|
+ }
|
|
|
+
|
|
|
+ def delete_hdfs_path(path: String): Unit = {
|
|
|
+ val hdfs_path = new org.apache.hadoop.fs.Path(path.toString)
|
|
|
+ val hdfs = org.apache.hadoop.fs.FileSystem.get(new org.apache.hadoop.conf.Configuration())
|
|
|
+
|
|
|
+ if (hdfs.exists(hdfs_path)) {
|
|
|
+ hdfs.delete(hdfs_path, true)
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ def hdfs_exits(path:String): Boolean = {
|
|
|
+ val hdfs_path = new org.apache.hadoop.fs.Path(path.toString)
|
|
|
+ val hdfs = org.apache.hadoop.fs.FileSystem.get(new org.apache.hadoop.conf.Configuration())
|
|
|
+
|
|
|
+ hdfs.exists(hdfs_path)
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ * 举例:keepDays=2 dateStr=20191015 => 保留20191015和20191014两天的数据
|
|
|
+ */
|
|
|
+ def hdfs_delete_not_keep_days(
|
|
|
+ path:String,
|
|
|
+ keepDays:Int,
|
|
|
+ dateStr:String,
|
|
|
+ pattern:String = "yyyyMMdd"
|
|
|
+ ): Unit ={
|
|
|
+ val file_list = this.getFiles(path)
|
|
|
+ println("hdfs_delete_not_keep_days-file_list")
|
|
|
+ file_list.foreach(println)
|
|
|
+
|
|
|
+ for (file <- file_list){
|
|
|
+ var flag = true
|
|
|
+ val date_early = MyDateUtils.getNumDaysBefore(dateStr, keepDays, pattern)
|
|
|
+ try{
|
|
|
+ val file_split_strs = file.split("/")
|
|
|
+ val len = file_split_strs.length
|
|
|
+ var file_date = file_split_strs(len-1)
|
|
|
+ if (file_date.equals("")){
|
|
|
+ file_date = file_split_strs(len-2)
|
|
|
+ }
|
|
|
+ var date1 = DateUtils.parseDate(file_date, Array[String](pattern))
|
|
|
+ var date2 = DateUtils.parseDate(date_early, Array[String](pattern))
|
|
|
+ if (date1.compareTo(date2) >= 0){
|
|
|
+ flag = false
|
|
|
+ }
|
|
|
+ }catch {
|
|
|
+ case e:Exception =>
|
|
|
+ flag = false
|
|
|
+ }
|
|
|
+
|
|
|
+ if (flag){
|
|
|
+ MyHdfsUtils.delete_hdfs_path(file.toString)
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ * @Author: zhangbo
|
|
|
+ * @Description: 给某hdfs路径加权限
|
|
|
+ *
|
|
|
+ */
|
|
|
+
|
|
|
+ def give_hdfs_permission(path:String): Unit ={
|
|
|
+ getHdfs(path).setPermission(new Path(path), new FsPermission("777"))
|
|
|
+ }
|
|
|
+}
|
|
|
+
|