#!/bin/sh set -x # source /root/anaconda3/bin/activate py37 sh_path=$(dirname $0) source ${sh_path}/00_common.sh export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8 export PATH=$SPARK_HOME/bin:$PATH export HADOOP_CONF_DIR=/etc/taihao-apps/hadoop-conf export JAVA_HOME=/usr/lib/jvm/java-1.8.0 # 全局常量 HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop TRAIN_PATH=/dw/recommend/model/31_ad_sample_data_v4/ BUCKET_FEATURE_PATH=/dw/recommend/model/33_ad_train_data_v4/ TABLE=alg_recsys_ad_sample_all # 训练用的数据路径 train_data_path="" # 评估用的数据路径 predict_date_path="" init() { local today_early_1="$(date -d '1 days ago' +%Y%m%d)" predict_date_path=${BUCKET_FEATURE_PATH}/${today_early_1} local count=1 local current_data="$(date -d '2 days ago' +%Y%m%d)" # 循环获取前 n 天的非节日日期 while [[ $count -lt 8 ]]; do date_key=$(date -d "$current_data" +%Y%m%d) # 判断是否是节日,并拼接训练数据路径 if [ $(is_not_holidays $date_key) -eq 1 ]; then if [[ -z ${train_data_path} ]]; then train_data_path="${BUCKET_FEATURE_PATH}/${date_key}" else train_data_path="${BUCKET_FEATURE_PATH}/${date_key},${train_data_path}" fi count=$((count + 1)) else echo "日期: ${date_key}是节日,跳过" fi current_data=$(date -d "$current_date -1 day" +%Y-%m-%d) done echo "train_data_path: ${train_data_path}" echo "predict_date_path: ${predict_date_path}" } # xgb_train() { # } # 主方法 main() { init } main