zhangbo 1 year ago
parent
commit
661da3da59

+ 2 - 2
zhangbo/01_train.sh

@@ -5,7 +5,7 @@ HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
 $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_train -m model/${model_name}_${day}.txt -dim 1,1,0 -core 8
 
 
-# nohup sh 01_train.sh 20231214 /dw/recommend/model/ros_sample/ model_sharev2 >p1.log 2>&1 &
-# nohup sh 01_train.sh 20231220 /dw/recommend/model/ros_sample/ model_ros_v1 >p_model_ros_v1.log 2>&1 &
+# nohup sh 01_train.sh 20231214 /dw/recommend/model/share_ratio_samples_v2/ model_sharev2 >p1.log 2>&1 &
+# nohup sh 01_train.sh 20231220 /dw/recommend/model/ros_sample_v2/ model_ros_v2 >p_model_ros_v2.log 2>&1 &
 # str 模型路径:/dw/recommend/model/share_ratio_samples_v2
 # ros 模型路径:/dw/recommend/model/ros_sample/

+ 4 - 1
zhangbo/03_predict.sh

@@ -7,6 +7,9 @@ $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_predic
 cat predict/${output_file}_$day.txt | /root/sunmingze/AUC/AUC
 
 # nohup sh 03_predict.sh 20231221 model_sharev2_20231220.txt >p3_2.log 2>&1 &
+# str:
 
 # ros:
-# nohup sh 03_predict.sh 20231221 /dw/recommend/model/ros_sample/ model_ros_v1_20231220.txt model_ros_v1 >p_pred.log 2>&1 &
+# nohup sh 03_predict.sh 20231221 /dw/recommend/model/ros_sample/ model_ros_v1_20231220.txt model_ros_v1 >p_pred.log 2>&1 &
+# nohup sh 03_predict.sh 20231221 /dw/recommend/model/ros_sample_v2/ model_ros_v2_20231220.txt model_ros_v2 >p_pred.log 2>&1 &
+# nohup sh 03_predict.sh 20231221 /dw/recommend/model/ros_sample/ model_ros_v2_20231220.txt model_ros_v2 >p_pred.log 2>&1 &

+ 4 - 1
zhangbo/04_upload.sh

@@ -1,5 +1,8 @@
 
 cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220_change.txt
+dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/
 
 
-dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/
+
+cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt
+dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/

+ 41 - 0
zhangbo/05_update_everyday_ros.sh

@@ -0,0 +1,41 @@
+#!/bin/sh
+# 0 全局变量/参数
+savePath=/dw/recommend/model/ros_sample_v2/
+model_name=model_ros_v2
+today="$(date -d +%Y%m%d)"
+#yesterday="$(date -d '1 days ago' +%Y%m%d)"
+yesterday=20231221
+
+
+HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
+FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
+MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
+OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
+
+# 1 生产数据
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.makedata_01_readtable2hdfs \
+--master yarn --driver-memory 1G --executor-memory 4G --executor-cores 2 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:32 savePath:${savePath} beginStr:${yesterday} endStr:${yesterday}
+
+# 2 加载上次模型 训练本轮数据 保存本轮模型
+end_date=${today}
+loop_date=${yesterday}
+while [[ "$loop_date" != "$end_date" ]]; do
+    echo -------train ${loop_date}----------
+    loop_date_model=$(date -d "$loop_date - 1 day" +%Y%m%d)
+    $HADOOP fs -text ${savePath}/dt=${loop_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${loop_date}.txt \
+-dim 1,1,0 -core 8 -im ${MODEL_PATH}/${model_name}_${loop_date_model}.txt
+    echo -------save ${MODEL_PATH}/${model_name}_${loop_date}.txt----------
+    loop_date=$(date -d "$loop_date + 1 day" +%Y%m%d)
+done
+
+# 3 本轮模型格式转换
+cat ${MODEL_PATH}/${model_name}_${yesterday}.txt \
+| sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
+> ${MODEL_PATH}/${model_name}_${yesterday}_change.txt
+
+# 4 转换后模型上传oss
+$HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${yesterday}_change.txt ${OSS_PATH}/${model_name}_change.txt

+ 42 - 0
zhangbo/05_update_everyday_str.sh

@@ -0,0 +1,42 @@
+#!/bin/sh
+# 0 全局变量/参数
+savePath=/dw/recommend/model/share_ratio_samples_v2/
+model_name=model_sharev2
+today="$(date -d '1 days ago' +%Y%m%d)"
+yesterday="$(date -d '2 days ago' +%Y%m%d)"
+yesterday_early=20231222
+#yesterday_early="$(date -d '3 days ago' +%Y%m%d)"
+
+
+HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
+FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
+MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
+OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
+
+# 1 生产数据
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.makedata_01_readtable2hdfs \
+--master yarn --driver-memory 1G --executor-memory 4G --executor-cores 2 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:32 savePath:${savePath} beginStr:${yesterday} endStr:${yesterday}
+
+# 2 加载上次模型 训练本轮数据 保存本轮模型
+end_date=${today}
+loop_date=${yesterday_early}
+while [[ "$loop_date" != "$end_date" ]]; do
+    echo -------train ${loop_date}----------
+    loop_date_model=$(date -d "$loop_date - 1 day" +%Y%m%d)
+    $HADOOP fs -text ${savePath}/dt=${loop_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${loop_date}.txt \
+-dim 1,1,0 -core 8 -im ${MODEL_PATH}/${model_name}_${loop_date_model}.txt
+    echo -------save ${MODEL_PATH}/${model_name}_${loop_date}.txt----------
+    loop_date=$(date -d "$loop_date + 1 day" +%Y%m%d)
+done
+
+# 3 本轮模型格式转换
+cat ${MODEL_PATH}/${model_name}_${yesterday}.txt \
+| sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
+> ${MODEL_PATH}/${model_name}_${yesterday}_change.txt
+
+# 4 转换后模型上传oss
+$HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${yesterday}_change.txt ${OSS_PATH}/${model_name}_change.txt