|
|
@@ -0,0 +1,114 @@
|
|
|
+#!/bin/bash
|
|
|
+
|
|
|
+abs_path=$(cd `dirname $0`; pwd)
|
|
|
+hdfs_url="hdfs://192.168.141.208:9000"
|
|
|
+hdfs_path="/dw/recommend/model/831_recsys_rov_train_data"
|
|
|
+hdfs_feature_path="/dw/recommend/model/831_recsys_analysis_data"
|
|
|
+data_path="${abs_path}/../data"
|
|
|
+model_path="${abs_path}/../model"
|
|
|
+monitor_py="${abs_path}/../src/tools/rec_monitor_push.py"
|
|
|
+model_name="推荐模型str+_v1"
|
|
|
+
|
|
|
+run_mode=""
|
|
|
+if(($#==1))
|
|
|
+then
|
|
|
+ run_mode=$1
|
|
|
+else
|
|
|
+ exit -1
|
|
|
+fi
|
|
|
+
|
|
|
+if [[ "$run_mode" != "run" ]]
|
|
|
+then
|
|
|
+ exit -1
|
|
|
+fi
|
|
|
+
|
|
|
+# 0. check data
|
|
|
+try_times=10
|
|
|
+wait_time=300s
|
|
|
+data_size_threshold=25000
|
|
|
+feature_size_threshold=10000
|
|
|
+data_date=$(date +%Y%m%d -d "-1 $days day")
|
|
|
+before_data_date=$(date +%Y%m%d -d "-2 $days day")
|
|
|
+for((i=0; i<=$try_times; i++))
|
|
|
+do
|
|
|
+ hdfs_file=$(printf "%s/%s/part-00063.gz" $hdfs_path $data_date)
|
|
|
+ hdfs dfs -fs $hdfs_url -test -e $hdfs_file
|
|
|
+ if [ $? -ne 0 ]
|
|
|
+ then
|
|
|
+ if [ $i -ge $try_times ]
|
|
|
+ then
|
|
|
+ echo `date` "$hdfs_file is not exist"
|
|
|
+ exit -1
|
|
|
+ else
|
|
|
+ echo `date` "check $i, $hdfs_file is not exist, sleep $wait_time"
|
|
|
+ sleep $wait_time
|
|
|
+ fi
|
|
|
+ else
|
|
|
+ echo `date` "$hdfs_file is exist"
|
|
|
+ data_size=$(hdfs dfs -fs $hdfs_url -text $hdfs_file | wc -l | awk '{print $1}')
|
|
|
+ if [ $data_size -le $data_size_threshold ]
|
|
|
+ then
|
|
|
+ level=error
|
|
|
+ msg=" ${model_name}更新失败, $hdfs_file $data_size <= $data_size_threshold"
|
|
|
+ #python3 $monitor_py --level "$level" --model "$model_name" --msg "$msg"
|
|
|
+ echo `date` "$msg"
|
|
|
+ exit -1
|
|
|
+ fi
|
|
|
+ break
|
|
|
+ fi
|
|
|
+done
|
|
|
+
|
|
|
+# 0.1 download feature
|
|
|
+hdfs_feature_file=$(printf "%s/%s/part-00000.gz" $hdfs_feature_path $data_date)
|
|
|
+hdfs dfs -fs $hdfs_url -test -e $hdfs_feature_file
|
|
|
+if [ $? -ne 0 ]
|
|
|
+then
|
|
|
+ echo `date` "$hdfs_feature_file is not exist"
|
|
|
+ exit -1
|
|
|
+else
|
|
|
+ local_feature_file="${data_path}/feature_${data_date}.csv"
|
|
|
+ hdfs dfs -fs $hdfs_url -text $hdfs_feature_file | grep -v -E '@wh@|@unknown|e1@|b8_6h|b8_12h|b9_6h|b9_12h|c9_.*cate|c9_lr1s' > $local_feature_file &
|
|
|
+ wait
|
|
|
+ sleep 30s
|
|
|
+ feature_size=$(wc -l $local_feature_file | awk '{print $1}')
|
|
|
+ if [ $feature_size -le $feature_size_threshold ]
|
|
|
+ then
|
|
|
+ exho `date` "$local_feature_file, $feature_size <= $feature_size_threshold"
|
|
|
+ exit -1
|
|
|
+ fi
|
|
|
+fi
|
|
|
+
|
|
|
+# 1. download data
|
|
|
+down_sh="${abs_path}/download_data.sh"
|
|
|
+echo `date` "sh +x $down_sh $before_data_date $data_date"
|
|
|
+sh +x $down_sh $before_data_date $data_date &
|
|
|
+wait
|
|
|
+sleep 30s
|
|
|
+
|
|
|
+# 2. train model
|
|
|
+train_sh="${abs_path}/train_model_data.sh"
|
|
|
+sub_model_path="${model_path}/${data_date}"
|
|
|
+if [ ! -d $sub_model_path ]
|
|
|
+then
|
|
|
+ mkdir $sub_model_path
|
|
|
+fi
|
|
|
+train_start_date=$(date -d "$data_date -13 day" +"%Y%m%d")
|
|
|
+train_end_date=$data_date
|
|
|
+save_model_file="${sub_model_path}/model_fm_for_recsys_v1_str.txt"
|
|
|
+echo `date` "sh +x $train_sh $train_start_date $train_end_date $save_model_file"
|
|
|
+sh +x $train_sh $train_start_date $train_end_date $save_model_file $local_feature_file &
|
|
|
+wait
|
|
|
+sleep 30s
|
|
|
+
|
|
|
+# 3. update model
|
|
|
+update_sh="${abs_path}/update_model.sh"
|
|
|
+echo `date` "sh +x $update_sh $save_model_file $model_name"
|
|
|
+sh +x $update_sh $save_model_file $model_name &
|
|
|
+wait
|
|
|
+sleep 30s
|
|
|
+
|
|
|
+# 4. remove data
|
|
|
+remove_sh="${abs_path}/remove_data.sh"
|
|
|
+sh +x $remove_sh
|
|
|
+wait
|
|
|
+
|