12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576 |
- #!/bin/sh
- export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
- HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
- PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
- origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto
- bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto
- # 删除五天之前的预测结果文件
- delete_predict_5d_ago() {
- echo "=========== 开始删除五天前的预测结果文件 $(date "+%Y-%m-%d %H:%M:%d") ==========="
- tmp_file_name=./files_to_delete.txt
- # 查询五天前的预测结果文件,并保存到临时文件
- find "$PREDICT_HOME" -type f -mtime +5 > "${tmp_file_name}"
- # 逐行读取临时文件中的路径并删除文件
- while IFS= read -r file; do
- echo "Deleting: $file"
- rm -f "$file"
- done < "${tmp_file_name}"
- # 删除临时文件
- rm -f "${tmp_file_name}"
- echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
- }
- delete_hdfs_path() {
- if [ "$#" -ne 2 ]; then
- echo "Usage: delete_path <early> <path>"
- return 1
- fi
- early=$1
- path=$2
- echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path} ${early}天前的文件 ==========="
- EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d)
- $HADOOP fs -ls $path | while read line
- do
- dir=$(echo $line | awk '{print $8}')
- modified_date=$(echo $line | awk '{print $6}')
- if [[ "${modified_date}" < "${SEVEN_DAYS_AGO}" ]]; then
- echo "Deleting: ${dir}"
- # $HADOOP fs -rm -r -skipTrash ${dir}
- fi
- done
- echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path} ${early}天前的文件结束 ==========="
- }
- main() {
- # # 删除五天前的预测结果文件
- # delete_predict_5d_ago
- # # 删除七天之前的HDFS中的特征原始数据
- # delete_hdfs_path 7 $origin_data_hdfs_dir
- # 删除七天之前的HDFS中的特征分桶数据
- delete_hdfs_path 7 $bucket_feature_hdfs_dir
- }
- main
|