30_delete_timer_file.sh 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859
  1. #!/bin/sh
  2. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  3. HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
  4. origin_data_hdfs_dir=/dw/recommend/model/41_recsys_origin_date
  5. str_train_data_dir=/dw/recommend/model/41_recsys_str_train_data
  6. ros_train_data_dir=/dw/recommend/model/41_recsys_ros_train_data
  7. str_bucket_data_dir=/dw/recommend/model/43_recsys_str_data_bucket
  8. ros_bucket_data_dir=/dw/recommend/model/43_recsys_ros_data_bucket
  9. # 删除HDFS中的目录
  10. delete_hdfs_path() {
  11. if [ "$#" -ne 2 ]; then
  12. echo "Usage: delete_path <early> <path>"
  13. return 1
  14. fi
  15. early=$1
  16. path=$2
  17. echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path}下 ${early}天前的文件 ==========="
  18. EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d)
  19. $HADOOP fs -ls $path | grep '^d' | while read line;
  20. do
  21. dir=$(echo $line | awk '{print $8}')
  22. modified_date=$(echo $line | awk '{print $6}')
  23. echo "${line}"
  24. if [[ "${modified_date}" < "${EARLY_DAYS_AGO}" ]]; then
  25. echo "Deleting: ${dir}"
  26. # $HADOOP fs -rm -r -skipTrash ${dir}
  27. fi
  28. done
  29. echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path}下 ${early}天前的文件结束 ==========="
  30. }
  31. main() {
  32. # 删除推荐原始数据
  33. delete_hdfs_path 10 $origin_data_hdfs_dir
  34. # 删除STR模型数据
  35. delete_hdfs_path 10 $str_train_data_dir
  36. delete_hdfs_path 10 $str_bucket_data_dir
  37. # 删除ROS模型数据
  38. delete_hdfs_path 10 $ros_train_data_dir
  39. delete_hdfs_path 10 $ros_bucket_data_dir
  40. }
  41. main