21_ad_model_add_dt_train_predict_auc.sh 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071
  1. #!/bin/sh
  2. # 指定基础模型,模型增量训练,预测,计算AUC脚本
  3. set -x
  4. begin_date=$1
  5. end_date=$2
  6. model_name=$3
  7. train_dim=$4
  8. predict_dim=$5
  9. PROJECT_HOME=/root/zhaohp/recommend-emr-dataprocess
  10. HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
  11. HDFS_TRAIN_DATE_PATH=/dw/recommend/model/33_ad_train_data_v4
  12. MODEL_PATH=${PROJECT_HOME}/model
  13. PREDICT_PATH=${PROJECT_HOME}/predict
  14. FM_TRAIN=/root/sunmingze/alphaFM/bin/fm_train
  15. FM_PREDICT=/root/sunmingze/alphaFM/bin/fm_predict
  16. train_date=$begin_date
  17. # 计算模型的AUC,从训练日期的后一天到参数的end_date
  18. predict_auc() {
  19. echo -e "\t==================== 开始预测 $train_date 模型 ===================="
  20. predict_date=$(date -d "$train_date +1 day" +%Y%m%d)
  21. predict_end_date=$(date -d "$end_date +1 day" +%Y%m%d)
  22. while [ "$predict_date" != "$predict_end_date" ]; do
  23. $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${predict_date}/* | ${FM_PREDICT} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${predict_dim} -core 8 -out ${PREDICT_PATH}/${model_name}_${train_date}_${predict_date}.txt
  24. auc=`cat ${PREDICT_PATH}/${model_name}_${train_date}_${predict_date}.txt | /root/sunmingze/AUC/AUC`
  25. echo "模型训练日期: ${train_date}, 模型预测日期: ${predict_date}, AUC: ${auc}, 模型路径: ${MODEL_PATH}/${model_name}_${train_date}.txt"
  26. predict_date=$(date -d "$predict_date +1 day" +%Y%m%d)
  27. done
  28. echo -e "\n\t==================== 预测 $train_date 模型结束 ===================="
  29. }
  30. main() {
  31. # 增量训练模型
  32. while [ "$train_date" != "$end_date" ]; do
  33. echo "==================== 开始训练 $train_date 模型 ===================="
  34. # 模型训练
  35. yesterday=$(date -d "$train_date -1 day" +%Y%m%d)
  36. input_model=${MODEL_PATH}/${model_name}_${yesterday}.txt
  37. if [ ! -e "${input_model}" ]; then
  38. echo "输入模型: ${input_model} 不存在,退出"
  39. exit 1
  40. fi
  41. $HADOOP fs -text ${HDFS_TRAIN_DATE_PATH}/${train_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${train_date}.txt -dim ${train_dim} -core 8 -im ${input_model}
  42. predict_auc
  43. train_date=$(date -d "$train_date +1 day" +%Y%m%d)
  44. echo "==================== 训练 $train_date 模型结束 ===================="
  45. echo -e "\n\n\n\n\n\n"
  46. done
  47. }
  48. main