#!/bin/sh export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop origin_data_hdfs_dir=/dw/recommend/model/41_recsys_origin_date str_train_data_dir=/dw/recommend/model/41_recsys_str_train_data ros_train_data_dir=/dw/recommend/model/41_recsys_ros_train_data str_bucket_data_dir=/dw/recommend/model/43_recsys_str_data_bucket ros_bucket_data_dir=/dw/recommend/model/43_recsys_ros_data_bucket # 删除HDFS中的目录 delete_hdfs_path() { if [ "$#" -ne 2 ]; then echo "Usage: delete_path " return 1 fi early=$1 path=$2 echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path}下 ${early}天前的文件 ===========" EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d) $HADOOP fs -ls $path | grep '^d' | while read line; do dir=$(echo $line | awk '{print $8}') modified_date=$(echo $line | awk '{print $6}') echo "${line}" if [[ "${modified_date}" < "${EARLY_DAYS_AGO}" ]]; then echo "Deleting: ${dir}" # $HADOOP fs -rm -r -skipTrash ${dir} fi done echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path}下 ${early}天前的文件结束 ===========" } main() { # 删除推荐原始数据 delete_hdfs_path 10 $origin_data_hdfs_dir # 删除STR模型数据 delete_hdfs_path 10 $str_train_data_dir delete_hdfs_path 10 $str_bucket_data_dir # 删除ROS模型数据 delete_hdfs_path 10 $ros_train_data_dir delete_hdfs_path 10 $ros_bucket_data_dir } main