23_delete_timer_file.sh 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364
  1. #!/bin/sh
  2. export PATH=$SPARK_HOME/bin:$PATH
  3. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  4. export JAVA_HOME=/usr/lib/jvm/java-1.8.0
  5. export PREDICT_CACHE_PATH=/root/zhaohp/XGB/predict_cache/
  6. export SEGMENT_BASE_PATH=/dw/recommend/model/36_model_attachment/score_calibration_file
  7. HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
  8. origin_data_hdfs_dir=/dw/recommend/model/41_recsys_origin_date
  9. str_train_data_dir=/dw/recommend/model/41_recsys_str_train_data
  10. ros_train_data_dir=/dw/recommend/model/41_recsys_ros_train_data
  11. str_bucket_data_dir=/dw/recommend/model/43_recsys_str_data_bucket
  12. ros_bucket_data_dir=/dw/recommend/model/43_recsys_ros_data_bucket
  13. # 删除HDFS中的目录
  14. delete_hdfs_path() {
  15. if [ "$#" -ne 2 ]; then
  16. echo "Usage: delete_path <early> <path>"
  17. return 1
  18. fi
  19. early=$1
  20. path=$2
  21. echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path}下 ${early}天前的文件 ==========="
  22. EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d)
  23. $HADOOP fs -ls $path | grep '^d' | while read line;
  24. do
  25. dir=$(echo $line | awk '{print $8}')
  26. modified_date=$(echo $line | awk '{print $6}')
  27. # echo "${line}"
  28. if [[ "${modified_date}" < "${EARLY_DAYS_AGO}" ]]; then
  29. echo "Deleting: ${dir}"
  30. # $HADOOP fs -rm -r -skipTrash ${dir}
  31. fi
  32. done
  33. echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path}下 ${early}天前的文件结束 ==========="
  34. }
  35. main() {
  36. # 删除推荐原始数据
  37. delete_hdfs_path 10 $origin_data_hdfs_dir
  38. # 删除STR模型数据
  39. delete_hdfs_path 10 $str_train_data_dir
  40. delete_hdfs_path 10 $str_bucket_data_dir
  41. # 删除ROS模型数据
  42. delete_hdfs_path 10 $ros_train_data_dir
  43. delete_hdfs_path 10 $ros_bucket_data_dir
  44. }
  45. main