#!/bin/sh

export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf

HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop

PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto
bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto


# 删除五天之前的预测结果文件
delete_predict_5d_ago() {

    echo "=========== 开始删除五天前的预测结果文件 $(date "+%Y-%m-%d %H:%M:%d") ==========="

    tmp_file_name=./files_to_delete.txt

    # 查询五天前的预测结果文件,并保存到临时文件
    find "$PREDICT_HOME" -type f -mtime +5 > "${tmp_file_name}"

    # 逐行读取临时文件中的路径并删除文件
    while IFS= read -r file; do
        echo "Deleting: $file"
        rm -f "$file"
    done < "${tmp_file_name}"

    # 删除临时文件
    rm -f "${tmp_file_name}"

    echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
}

# 删除HDFS中的目录
delete_hdfs_path() {
    if [ "$#" -ne 2 ]; then
        echo "Usage: delete_path <early> <path>"
        return 1
    fi

    early=$1
    path=$2

    echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 开始删除目录 ${path}下 ${early}天前的文件  ==========="

    EARLY_DAYS_AGO=$(date -d "${early} days ago" +%Y-%m-%d)

    $HADOOP fs -ls $path | grep '^d' | while read line;
    do
        dir=$(echo $line | awk '{print $8}')
        modified_date=$(echo $line | awk '{print $6}')
        echo "${line}"
        if [[ "${modified_date}" < "${EARLY_DAYS_AGO}" ]]; then
            echo "Deleting: ${dir}"
            $HADOOP fs -rm -r -skipTrash ${dir}
        fi

    done

    echo "=========== $(date "+%Y-%m-%d %H:%M:%d") 删除目录 ${path}下 ${early}天前的文件结束  ==========="

}


main() {
    # 删除五天前的预测结果文件
    delete_predict_5d_ago
    # 删除七天之前的HDFS中的特征原始数据
    delete_hdfs_path 7 $origin_data_hdfs_dir
    # 删除七天之前的HDFS中的特征分桶数据
    delete_hdfs_path 7 $bucket_feature_hdfs_dir
}


main