zhaohaipeng пре 9 месеци
родитељ
комит
2bf64b2dbf
1 измењених фајлова са 24 додато и 2 уклоњено
  1. 24 2
      zhangbo/07_ad_model_update_everyday.sh

+ 24 - 2
zhangbo/07_ad_model_update_everyday.sh

@@ -4,7 +4,7 @@ set -ex
 # 0 全局变量/参数
 originDataSavePath=/dw/recommend/model/31_ad_sample_data_auto/
 bucketFeatureSavePath=/dw/recommend/model/33_ad_train_data_nosparse_auto/
-model_name=ad_model_lr
+model_name=model_lr0
 today="$(date +%Y%m%d)"
 today_early_1="$(date -d '1 days ago' +%Y%m%d)"
 beginTime=08
@@ -12,6 +12,10 @@ endTime=23
 beginStr=${today_early_1}${beginTime}
 endStr=${today_early_1}${endTime}
 
+MODEL_PATH=/root/zhaohp/recommend-emr-dataprocess/model
+HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
+FM_HOME=/root/sunmingze/alphaFM
+OSS_PATH=oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/ad_model/
 
 # 1 判断依赖的数据表是否生产完成
 source /root/anaconda3/bin/activate py37
@@ -62,7 +66,7 @@ filterNames:XXXXX \
 bucketFileName:20240620_ad_bucket_249_fix.txt \
 readPath:${originDataSavePath} \
 savePath:${bucketFeatureSavePath}
-if [ $? -ne 0 ]: then
+if [ $? -ne 0 ]; then
    echo "Spark特征分桶处理任务执行失败"
    exit 1
 else
@@ -71,9 +75,27 @@ fi
 
 
 # 4 模型训练
+$HADOOP fs -text ${bucketFeatureSavePath}/${today_early_1}/* | /root/sunmingze/alphaFM/bin/fm_train -m model/${model_name}_${today_early_1}.txt -dim 1,1,0 -core 8
+if [ $? -ne 0 ]; then
+   echo "模型训练失败"
+   exit 1
+fi
+
 
 # 5 对比AUC
 
 # 6 模型格式转换
+cat ${MODEL_PATH}/${model_name}_${today_early_1}.txt \
+| sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
+> ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt
 
 # 7 模型文件上传OSS
+online_model_path=${OSS_PATH}/${model_name}.txt
+$HADOOP fs -test -e ${online_model_path}
+if [ $? -eq 0 ]; then
+    echo "数据存在, 先删除。"
+    $HADOOP fs -rm -r ${online_model_path}
+else
+    echo "数据不存在"
+fi
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}