zhangbo 1 年之前
父节点
当前提交
db4e3223db

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_07_rosData.scala

@@ -29,9 +29,9 @@ object makedata_07_rosData {
     val beginStr = param.getOrElse("beginStr", "20230101")
     val endStr = param.getOrElse("endStr", "20230101")
     val readPath = param.getOrElse("readPath", "/dw/recommend/model/00_sample_data/")
-    val savePath = param.getOrElse("savePath", "/dw/recommend/model/01_str_data/")
+    val savePath = param.getOrElse("savePath", "/dw/recommend/model/04_ros_data/")
     val featureVersion =  param.getOrElse("featureVersion", "v2")
-    val ifRepart = param.getOrElse("ifRepart", "100").toInt
+    val ifRepart = param.getOrElse("ifRepart", "10").toInt
     val labelVersion = param.getOrElse("labelVersion", "v1")
 
 

+ 3 - 1
zhangbo/01_train.sh

@@ -22,9 +22,11 @@ $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_train
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/01_str_data model_str_big >p1.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/01_str_data model_str_big1 0.5 1.0 >p1_train.log 2>&1 &
 
+
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small >p2.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small1 0.1 5.0 >p2_train.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/03_str_data model_str_mid >p3.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/04_str_data model_str_mid2 >p4_train.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/04_str_data model_str_mid4 0.1 5.0 >p4_train.log 2>&1 &
-# nohup sh 01_train.sh 20240114 /dw/recommend/model/05_str_data model_str_mid3 >p5_train.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/05_str_data model_str_mid3 >p5_train.log 2>&1 &
+# nohup sh 01_train.sh 20240111 /dw/recommend/model/04_str_data model_str_mid 0.1 5.0 >p1_train.log 2>&1 &

+ 1 - 1
zhangbo/02_train_go.sh

@@ -21,4 +21,4 @@ while [[ "$current_date" != "$end_date" ]]; do
     current_date=$(date -d "$current_date + 1 day" +%Y%m%d)
 done
 
-# nohup sh 02_train_go.sh 20240111 20240117 model_str_v4 >p_model_str_v4.log 2>&1 &
+# nohup sh 02_train_go.sh 20240112 20240117 model_str_mid >p2.log 2>&1 &

+ 1 - 1
zhangbo/03_predict.sh

@@ -15,7 +15,7 @@ cat predict/${output_file}_$day.txt | /root/sunmingze/AUC/AUC
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/01_str_data/ model_str_big1_20240114.txt model_str_big1 >p1_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/02_str_data/ model_str_small_20240114.txt model_str_small >p2_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/02_str_data/ model_str_small1_20240114.txt model_str_small1 >p2_pred.log 2>&1 &
-# nohup sh 03_predict.sh 20240115 /dw/recommend/model/03_str_data/ model_str_mid_20240114.txt model_str_mid >p3_pred.log 2>&1 &
+# nohup sh 03_predict.sh 20240117 /dw/recommend/model/04_str_data/ model_str_mid_20240116.txt model_str_mid >p3_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/04_str_data/ model_str_mid2_20240114.txt model_str_mid2 >p4_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/04_str_data/ model_str_mid4_20240114.txt model_str_mid4 >p4_pred.log 2>&1 &
 # nohup sh 03_predict.sh 20240115 /dw/recommend/model/05_str_data/ model_str_mid3_20240114.txt model_str_mid3 >p5_pred.log 2>&1 &

+ 3 - 0
zhangbo/04_upload.sh

@@ -16,3 +16,6 @@ cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20240106.
 dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20240106_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/model_ros_v2_20231220_change.txt
 
 
+cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_str_mid_20240112.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_str_mid_20240112_change.txt
+dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_str_mid_20240112_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/model_str_mid.txt
+

+ 34 - 13
zhangbo/05_update_everyday_str.sh

@@ -1,32 +1,46 @@
 #!/bin/sh
-set -e
-set -x
+set -ex
 # 0 全局变量/参数
-savePath=/dw/recommend/model/share_ratio_samples_v2/
-model_name=model_sharev2
+savePath=/dw/recommend/model/04_str_data/
+model_name=model_str_mid
 today="$(date +%Y%m%d)"
 today_early_1="$(date -d '1 days ago' +%Y%m%d)"
-#yesterday="$(date -d '1 days ago' +%Y%m%d)"
-yesterday=20231221
-jar_main=makedata_01_readtable2hdfs
+yesterday="$(date -d '1 days ago' +%Y%m%d)"
 
 HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
 FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
 MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
-OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
+OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/"
 
 # 1 生产数据
 /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
---class com.aliyun.odps.spark.examples.makedata.${jar_main} \
---master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+--class com.aliyun.odps.spark.examples.makedata.makedata_06_originData \
+--name every_day_origindata_${model_name}_${today} \
+--master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 16 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 tablePart:32 savePath:${savePath} beginStr:${today_early_1} endStr:${today_early_1}
+if [ $? -eq 1 ]; then
+    echo "Spark原始样本生产任务执行失败"
+    exit 1
+else
+    echo "spark原始样本生产执行成功"
+fi
 
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.makedata_07_strData \
+--name every_day_strdata_${model_name}_${today} \
+--master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 16 \
+./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+savePath:${savePath} beginStr:${today_early_1} endStr:${today_early_1} featureVersion:v4 ifRepart:100
 if [ $? -eq 1 ]; then
-    echo "Spark任务执行失败"
+    echo "Spark训练样本生产任务执行失败"
     exit 1
+else
+    echo "spark训练样本生产执行成功"
 fi
 
+
+
 # 2 加载上次模型 训练本轮数据 保存本轮模型
 end_date=${today}
 loop_date=${yesterday}
@@ -49,8 +63,15 @@ cat ${MODEL_PATH}/${model_name}_${today_early_1}.txt \
 > ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt
 
 # 4 转换后模型上传oss
-$HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
-$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${OSS_PATH}/${model_name}_change.txt
+online_model_path=${OSS_PATH}/${model_name}.txt
+$HADOOP fs -test -e ${online_model_path}
+if [ $? -eq 0 ]; then
+    echo "数据存在, 先删除。"
+    $HADOOP fs -rm -r ${online_model_path}
+else
+    echo "数据不存在"
+fi
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}
 
 
 #nohup sh 05_update_everyday_str.sh > p.log 2>&1 &