|
@@ -1,14 +1,12 @@
|
|
#!/bin/sh
|
|
#!/bin/sh
|
|
|
|
|
|
-set -x
|
|
|
|
-
|
|
|
|
export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
|
|
export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
|
|
|
|
|
|
HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
|
|
HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
|
|
|
|
|
|
PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
|
|
PREDICT_HOME=/root/zhaohp/recommend-emr-dataprocess/predict
|
|
-origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto_test
|
|
|
|
-bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto_test
|
|
|
|
|
|
+origin_data_hdfs_dir=/dw/recommend/model/31_ad_sample_data_v3_auto
|
|
|
|
+bucket_feature_hdfs_dir=/dw/recommend/model/33_ad_train_data_v3_auto
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@@ -25,7 +23,7 @@ delete_predict_5d_ago() {
|
|
# 逐行读取临时文件中的路径并删除文件
|
|
# 逐行读取临时文件中的路径并删除文件
|
|
while IFS= read -r file; do
|
|
while IFS= read -r file; do
|
|
echo "Deleting: $file"
|
|
echo "Deleting: $file"
|
|
- # rm -f "$file"
|
|
|
|
|
|
+ rm -f "$file"
|
|
done < "${tmp_file_name}"
|
|
done < "${tmp_file_name}"
|
|
|
|
|
|
# 删除临时文件
|
|
# 删除临时文件
|
|
@@ -34,22 +32,38 @@ delete_predict_5d_ago() {
|
|
echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
|
|
echo "=========== 删除五天前的预测结果文件结束 $(date "+%Y-%m-%d %H:%M:%d") ==========="
|
|
}
|
|
}
|
|
|
|
|
|
-# 删除五天之前 HDFS中的原始特征数据
|
|
|
|
-delete_hdfs_origin_data_5d_ago() {
|
|
|
|
- FIVE_DAYS_AGO=$(date -d "5 days ago" +%Y-%m-%d)
|
|
|
|
|
|
+# 删除七天之前 HDFS中的原始特征数据
|
|
|
|
+delete_hdfs_origin_data_7d_ago() {
|
|
|
|
+ FIVE_DAYS_AGO=$(date -d "7 days ago" +%Y-%m-%d)
|
|
|
|
|
|
|
|
|
|
$HADOOP fs -ls $origin_data_hdfs_dir | while read line
|
|
$HADOOP fs -ls $origin_data_hdfs_dir | while read line
|
|
do
|
|
do
|
|
- echo "${line}"
|
|
|
|
dir=$(echo $line | awk '{print $8}')
|
|
dir=$(echo $line | awk '{print $8}')
|
|
modified_date=$(echo $line | awk '{print $6}')
|
|
modified_date=$(echo $line | awk '{print $6}')
|
|
|
|
|
|
|
|
+ if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
|
|
|
|
+ echo "Deleting: ${dir}"
|
|
|
|
+ $HADOOP fs -rm -r -skipTrash ${dir}
|
|
|
|
+ fi
|
|
|
|
+
|
|
|
|
+ done
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+# 删除七天之前 HDFS中的特征分桶数据
|
|
|
|
+delete_hdfs_bucket_feature_7d_ago() {
|
|
|
|
+ FIVE_DAYS_AGO=$(date -d "7 days ago" +%Y-%m-%d)
|
|
|
|
+
|
|
|
|
|
|
|
|
+ $HADOOP fs -ls $bucket_feature_hdfs_dir | while read line
|
|
|
|
+ do
|
|
|
|
+ dir=$(echo $line | awk '{print $8}')
|
|
|
|
+ modified_date=$(echo $line | awk '{print $6}')
|
|
|
|
|
|
if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
|
|
if [[ "${modified_date}" < "${FIVE_DAYS_AGO}" ]]; then
|
|
- echo "dir=${dir}"
|
|
|
|
- echo "modified_date=${modified_date}"
|
|
|
|
|
|
+ echo "Deleting: ${dir}"
|
|
|
|
+ $HADOOP fs -rm -r -skipTrash ${dir}
|
|
fi
|
|
fi
|
|
|
|
|
|
done
|
|
done
|
|
@@ -57,8 +71,12 @@ delete_hdfs_origin_data_5d_ago() {
|
|
}
|
|
}
|
|
|
|
|
|
main() {
|
|
main() {
|
|
- # delete_predict_early_5d
|
|
|
|
- delete_hdfs_origin_data_5d_ago
|
|
|
|
|
|
+ # 删除五天前的预测结果文件
|
|
|
|
+ delete_predict_early_5d
|
|
|
|
+ # 删除七天之前的HDFS中的特征原始数据
|
|
|
|
+ delete_hdfs_origin_data_7d_ago
|
|
|
|
+ # 删除七天之前的HDFS中的特征分桶数据
|
|
|
|
+ delete_hdfs_bucket_feature_7d_ago
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|