12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- #!/bin/sh
- # 计算不同VID的平均分
- set -x
- predict_date=$1
- model_name=$2
- predict_dim=$3
- PROJECT_HOME=/root/zhaohp/recommend-emr-dataprocess/
- MODEL_PATH=${PROJECT_HOME}/model/20240805/
- PREDICT_PATH=${PROJECT_HOME}/predict/recommend/
- TXT_PATH=/mnt/disk1/20240729
- HADOOP=/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop
- FM_PREDICT=/root/sunmingze/alphaFM/bin/fm_predict
- vids=(22895200 22751457 14146727 22847440 22927926 22858609 22974689 22563167 22959023 22970515 22946931 22994781 20720060 22979110)
- restore_score() {
- for(( i = 0; i < ${#vids[@]}; i++)) do
- vid=${vids[i]}
- score_avg=$(awk '{
- score = $2
- new_score = ( 0.1 * score ) / ( 1 - 0.9 * score)
- sum += new_score
- count++
- } END {
- if ( count > 0 ){
- print sum / count
- } else {
- print "NaN"
- }
- }' ${PREDICT_PATH}/${model_name}_${predict_date}_${vid}.txt)
- echo -e "VID: ${vid} 平均分计算结果: ${score_avg} \n\t数据路径: ${PREDICT_PATH}/${model_name}_${predict_date}_${vid}.txt"
- done
- }
- main() {
- for(( i = 0; i < ${#vids[@]}; i++)) do
- vid=${vids[i]}
- cat ${TXT_PATH}/${predict_date}.txt | \
- awk -v vid="$vid" -F'\t' '{
- if ($2 == vid) {
- split($0, fields, "\t");
- OFS="\t";
- line="";
- for (i=1; i<= length(fields); i++){
- if (i != 2) {
- line = (line ? line "\t" : "") fields[i];
- }
- }
- print line
- }
- }' | \
- ${FM_PREDICT} -m ${MODEL_PATH}/${model_name}.txt -dim ${predict_dim} -core 8 -out ${PREDICT_PATH}/${model_name}_${predict_date}_${vid}.txt
- score_avg=`awk '{ sum += $2; count++ } END { if (count > 0) print sum / count }' ${PREDICT_PATH}/${model_name}_${predict_date}_${vid}.txt`
- echo -e "VID: ${vid} 平均分计算结果: ${score_avg} \n\t模型路径: ${MODEL_PATH}/${model_name}.txt \n\t评估数据路径: ${TXT_PATH}/${predict_date}.txt"
- done
- }
- main
- # nohup ./recommend/20_vid_avg_score.sh 20240729 model_recommend_v3_sample_01_20240728 8 > logs/20_vid_model_recommend_v3_20240728.sh 2>&1 &
|