|
@@ -20,7 +20,7 @@ bucketDataPath=/dw/recommend/model/16_train_data/
|
|
# shellcheck disable=SC2039
|
|
# shellcheck disable=SC2039
|
|
source /root/anaconda3/bin/activate py37
|
|
source /root/anaconda3/bin/activate py37
|
|
# shellcheck disable=SC2154
|
|
# shellcheck disable=SC2154
|
|
-echo "----------step1------------开始校验是否生产完数据,分区信息:beginStr:${begin_early_2_Str}${beginHhStr},endStr:${end_early_2_Str}${endHhStr}"
|
|
|
|
|
|
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step1------------开始校验是否生产完数据,分区信息:beginStr:${begin_early_2_Str}${beginHhStr},endStr:${end_early_2_Str}${endHhStr}"
|
|
while true; do
|
|
while true; do
|
|
python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --beginStr ${begin_early_2_Str}${beginHhStr} --endStr ${end_early_2_Str}${endHhStr})
|
|
python_return_code=$(python /root/joe/recommend-emr-dataprocess/qiaojialiang/checkHiveDataUtil.py --table ${table} --beginStr ${begin_early_2_Str}${beginHhStr} --endStr ${end_early_2_Str}${endHhStr})
|
|
echo "python 返回值:${python_return_code}"
|
|
echo "python 返回值:${python_return_code}"
|
|
@@ -40,7 +40,7 @@ while true; do
|
|
done
|
|
done
|
|
|
|
|
|
# 1 生产原始数据
|
|
# 1 生产原始数据
|
|
-echo "----------step2------------开始根据${table}生产原始数据"
|
|
|
|
|
|
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step2------------开始根据${table}生产原始数据"
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_13_originData_20240705 \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_13_originData_20240705 \
|
|
--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
|
|
@@ -58,7 +58,7 @@ fi
|
|
|
|
|
|
|
|
|
|
# 2 特征值拼接
|
|
# 2 特征值拼接
|
|
-echo "----------step3------------开始特征值拼接"
|
|
|
|
|
|
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step3------------开始特征值拼接"
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_14_valueData_20240705 \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_14_valueData_20240705 \
|
|
--master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
|
|
--master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
|
|
@@ -74,7 +74,7 @@ else
|
|
fi
|
|
fi
|
|
|
|
|
|
# 3 特征分桶
|
|
# 3 特征分桶
|
|
-echo "----------step4------------根据特征分桶生产重打分特征数据"
|
|
|
|
|
|
+echo "$(date +%Y-%m-%d_%H-%M-%S)----------step4------------根据特征分桶生产重打分特征数据"
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_16_bucketData_20240705 \
|
|
--class com.aliyun.odps.spark.examples.makedata_qiao.makedata_16_bucketData_20240705 \
|
|
--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
|
|
--master yarn --driver-memory 2G --executor-memory 4G --executor-cores 1 --num-executors 16 \
|