|
@@ -80,14 +80,15 @@ global_init() {
|
|
# fi
|
|
# fi
|
|
|
|
|
|
# 删除HDFS目录,保证本次任务运行时目录干净
|
|
# 删除HDFS目录,保证本次任务运行时目录干净
|
|
- $HADOOP fs -rm -r -skipTrash ${trainBucketFeaturePath}
|
|
|
|
- $HADOOP fs -rm -r -skipTrash ${predictBucketFeaturePath}
|
|
|
|
|
|
+ # $HADOOP fs -rm -r -skipTrash ${trainBucketFeaturePath}
|
|
|
|
+ # $HADOOP fs -rm -r -skipTrash ${predictBucketFeaturePath}
|
|
|
|
|
|
echo "全局变量初始化化: "
|
|
echo "全局变量初始化化: "
|
|
echo " train_begin_str=${train_begin_str}"
|
|
echo " train_begin_str=${train_begin_str}"
|
|
echo " train_end_str=${train_end_str}"
|
|
echo " train_end_str=${train_end_str}"
|
|
echo " predict_begin_str=${predict_begin_str}"
|
|
echo " predict_begin_str=${predict_begin_str}"
|
|
echo " predict_end_str=${predict_end_str}"
|
|
echo " predict_end_str=${predict_end_str}"
|
|
|
|
+ echo " originDataSavePath=${originDataSavePath}"
|
|
echo " trainBucketFeaturePath=${trainBucketFeaturePath}"
|
|
echo " trainBucketFeaturePath=${trainBucketFeaturePath}"
|
|
echo " predictBucketFeaturePath=${predictBucketFeaturePath}"
|
|
echo " predictBucketFeaturePath=${predictBucketFeaturePath}"
|
|
echo " local_model_file_path=${local_model_file_path}"
|
|
echo " local_model_file_path=${local_model_file_path}"
|
|
@@ -150,7 +151,7 @@ make_origin_data() {
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
tablePart:64 repartition:16 \
|
|
tablePart:64 repartition:16 \
|
|
beginStr:${train_begin_str} endStr:${predict_end_str} \
|
|
beginStr:${train_begin_str} endStr:${predict_end_str} \
|
|
- savePath:${trainBucketFeaturePath} \
|
|
|
|
|
|
+ savePath:${originDataSavePath} \
|
|
table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
|
|
table:alg_recsys_ad_sample_all filterHours:00,01,02,03,04,05,06,07 \
|
|
idDefaultValue:0.01
|
|
idDefaultValue:0.01
|
|
|
|
|
|
@@ -169,8 +170,8 @@ make_bucket_feature() {
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
|
|
beginStr:${train_begin_str} endStr:${train_end_str} repartition:100 \
|
|
beginStr:${train_begin_str} endStr:${train_end_str} repartition:100 \
|
|
filterNames:adid_,targeting_conversion_ \
|
|
filterNames:adid_,targeting_conversion_ \
|
|
- readPath:${trainBucketFeaturePath} \
|
|
|
|
- savePath:${predictBucketFeaturePath}
|
|
|
|
|
|
+ readPath:${originDataSavePath} \
|
|
|
|
+ savePath:${trainBucketFeaturePath}
|
|
|
|
|
|
local return_code=$?
|
|
local return_code=$?
|
|
check_run_status $return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
|
|
check_run_status $return_code $step_start_time "Spark特征分桶任务: 训练数据分桶"
|