1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859 |
- #!/bin/sh
- export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
- HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
- origin_data_hdfs_dir=/dw/recommend/model/41_recsys_origin_date
- str_train_data_dir=/dw/recommend/model/41_recsys_str_train_data
- ros_train_data_dir=/dw/recommend/model/41_recsys_ros_train_data
- str_bucket_data_dir=/dw/recommend/model/43_recsys_str_data_bucket
- ros_bucket_data_dir=/dw/recommend/model/43_recsys_ros_data_bucket
- # 删除HDFS中的目录
- delete_hdfs_path() {
- if [ "$#" -ne 2 ]; then
- echo "Usage: delete_path <early> <path>"
- return 1
- fi
- early=$1
- path=$2
- echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path}下 ${early}天前的文件 ==========="
- EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d)
- $HADOOP fs -ls $path | grep '^d' | while read line;
- do
- dir=$(echo $line | awk '{print $8}')
- modified_date=$(echo $line | awk '{print $6}')
- echo "${line}"
- if [[ "${modified_date}" < "${EARLY_DAYS_AGO}" ]]; then
- echo "Deleting: ${dir}"
- # $HADOOP fs -rm -r -skipTrash ${dir}
- fi
- done
- echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path}下 ${early}天前的文件结束 ==========="
- }
- main() {
- # 删除推荐原始数据
- delete_hdfs_path 10 $origin_data_hdfs_dir
- # 删除STR模型数据
- delete_hdfs_path 10 $str_train_data_dir
- delete_hdfs_path 10 $str_bucket_data_dir
- # 删除ROS模型数据
- delete_hdfs_path 10 $ros_train_data_dir
- delete_hdfs_path 10 $ros_bucket_data_dir
- }
- main
|