12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364 |
- #!/bin/sh
- set -x
- # source /root/anaconda3/bin/activate py37
- sh_path=$(dirname $0)
- source ${sh_path}/00_common.sh
- export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
- export PATH=$SPARK_HOME/bin:$PATH
- export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf
- export JAVA_HOME=/usr/lib/jvm/java-1.8.0
- # 全局常量
- HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
- TRAIN_PATH=/dw/recommend/model/31_ad_sample_data_v4/
- BUCKET_FEATURE_PATH=/dw/recommend/model/33_ad_train_data_v4/
- TABLE=alg_recsys_ad_sample_all
- # 训练用的数据路径
- train_data_path=""
- # 评估用的数据路径
- predict_date_path=""
- init() {
- local today_early_1="$(date -d '1 days ago' +%Y%m%d)"
- predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1}
- local count=1
- # 循环获取前 n 天的非节日日期
- while [[ $count -lt 8 ]]; do
- date_key=$(date -d "$today_early_1 -${count} day" +%Y%m%d)
- # 判断是否是节日,并拼接训练数据路径
- if [ $(is_not_holidays $date_key) -eq 1 ]; then
- if [[ -z ${train_data_path} ]]; then
- train_data_path="${BUCKET_FEATURE_PATH}/${date_key}"
- else
- train_data_path="${BUCKET_FEATURE_PATH}/${date_key},${train_data_path}"
- fi
- count=$((count + 1))
- else
- echo "日期: ${date_key}是节日,跳过"
- fi
- done
- echo "train_data_path: ${train_data_path}"
- echo "predict_date_path: ${predict_date_path}"
- }
- # xgb_train() {
- # }
- # 主方法
- main() {
- init
- }
- main
|