|  | @@ -1,14 +1,12 @@
 | 
	
		
			
				|  |  |  #!/bin/sh
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -set -x
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  |  export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
 | 
	
		
			
				|  |  | -origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto_test
 | 
	
		
			
				|  |  | -bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto_test
 | 
	
		
			
				|  |  | +origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto
 | 
	
		
			
				|  |  | +bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -25,7 +23,7 @@ delete_predict_5d_ago() {
 | 
	
		
			
				|  |  |      # 逐行读取临时文件中的路径并删除文件
 | 
	
		
			
				|  |  |      while IFS= read -r file; do
 | 
	
		
			
				|  |  |          echo "Deleting: $file"
 | 
	
		
			
				|  |  | -        # rm -f "$file"
 | 
	
		
			
				|  |  | +        rm -f "$file"
 | 
	
		
			
				|  |  |      done < "${tmp_file_name}"
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      # 删除临时文件
 | 
	
	
		
			
				|  | @@ -34,22 +32,38 @@ delete_predict_5d_ago() {
 | 
	
		
			
				|  |  |      echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -# 删除五天之前 HDFS中的原始特征数据
 | 
	
		
			
				|  |  | -delete_hdfs_origin_data_5d_ago() {
 | 
	
		
			
				|  |  | -    FIVE_DAYS_AGO=$(date -d "5 days ago" +%Y-%m-%d)
 | 
	
		
			
				|  |  | +# 删除七天之前 HDFS中的原始特征数据
 | 
	
		
			
				|  |  | +delete_hdfs_origin_data_7d_ago() {
 | 
	
		
			
				|  |  | +    FIVE_DAYS_AGO=$(date -d "7 days ago" +%Y-%m-%d)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      $HADOOP fs -ls $origin_data_hdfs_dir | while read line
 | 
	
		
			
				|  |  |      do
 | 
	
		
			
				|  |  | -        echo "${line}"
 | 
	
		
			
				|  |  |          dir=$(echo $line | awk '{print $8}')
 | 
	
		
			
				|  |  |          modified_date=$(echo $line | awk '{print $6}')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +        if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
 | 
	
		
			
				|  |  | +            echo "Deleting: ${dir}"
 | 
	
		
			
				|  |  | +            $HADOOP fs -rm -r -skipTrash ${dir}
 | 
	
		
			
				|  |  | +        fi
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    done
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 删除七天之前 HDFS中的特征分桶数据
 | 
	
		
			
				|  |  | +delete_hdfs_bucket_feature_7d_ago() {
 | 
	
		
			
				|  |  | +    FIVE_DAYS_AGO=$(date -d "7 days ago" +%Y-%m-%d)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    $HADOOP fs -ls $bucket_feature_hdfs_dir | while read line
 | 
	
		
			
				|  |  | +    do
 | 
	
		
			
				|  |  | +        dir=$(echo $line | awk '{print $8}')
 | 
	
		
			
				|  |  | +        modified_date=$(echo $line | awk '{print $6}')
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
 | 
	
		
			
				|  |  | -            echo "dir=${dir}"
 | 
	
		
			
				|  |  | -            echo "modified_date=${modified_date}"
 | 
	
		
			
				|  |  | +            echo "Deleting: ${dir}"
 | 
	
		
			
				|  |  | +            $HADOOP fs -rm -r -skipTrash ${dir}
 | 
	
		
			
				|  |  |          fi
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      done
 | 
	
	
		
			
				|  | @@ -57,8 +71,12 @@ delete_hdfs_origin_data_5d_ago() {
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  main() {
 | 
	
		
			
				|  |  | -    # delete_predict_early_5d
 | 
	
		
			
				|  |  | -    delete_hdfs_origin_data_5d_ago
 | 
	
		
			
				|  |  | +    # 删除五天前的预测结果文件
 | 
	
		
			
				|  |  | +    delete_predict_early_5d
 | 
	
		
			
				|  |  | +    # 删除七天之前的HDFS中的特征原始数据
 | 
	
		
			
				|  |  | +    delete_hdfs_origin_data_7d_ago
 | 
	
		
			
				|  |  | +    # 删除七天之前的HDFS中的特征分桶数据
 | 
	
		
			
				|  |  | +    delete_hdfs_bucket_feature_7d_ago
 | 
	
		
			
				|  |  |  }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 |