|  | @@ -0,0 +1,66 @@
 | 
	
		
			
				|  |  | +#!/bin/sh
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 模型训练,预测,计算AUC脚本
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# set -x
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +begin_date=$1
 | 
	
		
			
				|  |  | +end_date=$2
 | 
	
		
			
				|  |  | +model_name=$3
 | 
	
		
			
				|  |  | +dim=$4
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +PROJECT_HOME=/root/zhaohp/recommend-emr-dataprocess
 | 
	
		
			
				|  |  | +HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
 | 
	
		
			
				|  |  | +HDFS_TRAIN_DATE_PATH=/dw/recommend/model/33_ad_train_data_v4
 | 
	
		
			
				|  |  | +MODEL_PATH=${PROJECT_HOME}/model
 | 
	
		
			
				|  |  | +PREDICT_PATH=${PROJECT_HOME}/predict
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +FM_TRAIN=/root/sunmingze/alphaFM/bin/fm_train
 | 
	
		
			
				|  |  | +FM_PREDICT=/root/sunmingze/alphaFM/bin/fm_predict
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +train_date=$begin_date
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +# 计算模型的AUC,从训练日期的后一天到参数的end_date
 | 
	
		
			
				|  |  | +predict_auc() {
 | 
	
		
			
				|  |  | +    echo -e "\t==================== 开始预测 $train_date 模型 ===================="
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    predict_date=$(date -d "$train_date +1 day" +%Y%m%d)
 | 
	
		
			
				|  |  | +    predict_end_date=$(date -d "$end_date +1 day" +%Y%m%d)
 | 
	
		
			
				|  |  | +    while [ "$predict_date" != "$predict_end_date" ]; do
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${predict_date}/* | ${FM_PREDICT} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${dim} -core 8 -out ${PREDICT_PATH}/${model_name}_${train_date}.txt
 | 
	
		
			
				|  |  | +        auc=`cat ${PREDICT_PATH}/${model_name}_${train_date}.txt | /root/sunmingze/AUC/AUC`
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        echo "模型训练日期: ${train_date}, 模型预测日期: ${predict_date}, AUC: ${auc}, 模型路径: ${MODEL_PATH}/${model_name}_${train_date}.txt"
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        predict_date=$(date -d "$predict_date +1 day" +%Y%m%d)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    done
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    echo -e "\n\t==================== 预测 $train_date 模型结束 ===================="
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +main() {
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    # 增量训练模型
 | 
	
		
			
				|  |  | +    while [ "$train_date" != "$end_date" ]; do
 | 
	
		
			
				|  |  | +        echo -e "\n\n\n\n\n\n"
 | 
	
		
			
				|  |  | +        echo "==================== 开始训练 $train_date 模型 ===================="
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if [ "$train_date" == "$begin_date" ]; then
 | 
	
		
			
				|  |  | +            $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${train_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${dim} -core 8
 | 
	
		
			
				|  |  | +        else
 | 
	
		
			
				|  |  | +            yesterday=$(date -d "$train_date -1 day" +%Y%m%d)
 | 
	
		
			
				|  |  | +            $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${train_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${dim} -core 8 -im ${MODEL_PATH}/${model_name}_${yesterday}.txt
 | 
	
		
			
				|  |  | +        fi
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        predict_auc
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        train_date=$(date -d "$train_date +1 day" +%Y%m%d)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        echo "==================== 训练 $train_date 模型结束 ===================="
 | 
	
		
			
				|  |  | +    done
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +}
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +main
 |