03_delete_timer_file.sh 1.6 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. #!/bin/sh
  2. set -x
  3. export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
  4. HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
  5. PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
  6. origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto_test
  7. bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto_test
  8. # 删除五天之前的预测结果文件
  9. delete_predict_5d_ago() {
  10. echo "=========== 开始删除五天前的预测结果文件 $(date "+%Y-%m-%d %H:%M:%d") ==========="
  11. tmp_file_name=./files_to_delete.txt
  12. # 查询五天前的预测结果文件,并保存到临时文件
  13. find "$PREDICT_HOME" -type f -mtime +5 > "${tmp_file_name}"
  14. # 逐行读取临时文件中的路径并删除文件
  15. while IFS= read -r file; do
  16. echo "Deleting: $file"
  17. # rm -f "$file"
  18. done < "${tmp_file_name}"
  19. # 删除临时文件
  20. rm -f "${tmp_file_name}"
  21. echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
  22. }
  23. # 删除五天之前 HDFS中的原始特征数据
  24. delete_hdfs_origin_data_5d_ago() {
  25. FIVE_DAYS_AGO=$(date -d "5 days ago" +%Y-%m-%d)
  26. $HADOOP fs -ls $origin_data_hdfs_dir | while read line
  27. do
  28. echo "${line}"
  29. dir=$(echo $line | awk '{print $8}')
  30. modified_date=$(echo $line | awk '{print $6}')
  31. if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
  32. echo "dir=${dir}"
  33. echo "modified_date=${modified_date}"
  34. fi
  35. done
  36. }
  37. main() {
  38. # delete_predict_early_5d
  39. delete_hdfs_origin_data_5d_ago
  40. }
  41. main