zhangbo 1 vuosi sitten
vanhempi
commit
6b56c959f6

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_04_sampleStatic.scala → src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_05_sampleStatic.scala

@@ -11,7 +11,7 @@ import java.util
 import scala.collection.JavaConversions._
 
 
-object makedata_04_sampleStatic {
+object makedata_05_sampleStatic {
   def main(args: Array[String]) {
     val spark = SparkSession
       .builder()

+ 6 - 0
zhangbo/04_upload.sh

@@ -6,3 +6,9 @@ dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_sharev2_202
 
 cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt
 dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20231220_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/
+
+
+cat /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20240106.txt | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' > /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20240106_change.txt
+dfs -put /root/zhangbo/recommend-emr-dataprocess/zhangbo/model/model_ros_v2_20240106_change.txt oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/model_ros_v2_20231220_change.txt
+
+

+ 16 - 15
zhangbo/05_update_everyday_ros.sh

@@ -5,9 +5,10 @@ set -x
 savePath=/dw/recommend/model/ros_sample_v2/
 model_name=model_ros_v2
 today="$(date +%Y%m%d)"
+today_early_1="$(date -d '1 days ago' +%Y%m%d)"
 #yesterday="$(date -d '1 days ago' +%Y%m%d)"
 yesterday=20231227
-
+jar_main=$makedata_04_rosHdfsFromTablev2
 
 HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
 FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
@@ -15,16 +16,16 @@ MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
 OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
 
 # 1 生产数据
-#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
-#--class com.aliyun.odps.spark.examples.makedata.makedata_01_readtable2hdfs \
-#--master yarn --driver-memory 1G --executor-memory 4G --executor-cores 2 --num-executors 16 \
-#/root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-#tablePart:32 savePath:${savePath} beginStr:${yesterday} endStr:${yesterday}
-#
-#if [ $? -eq 1 ]; then
-#    echo "Spark任务执行失败"
-#    exit 1
-#fi
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.${jar_main} \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+/root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:32 savePath:${savePath} beginStr:${today_early_1} endStr:${today_early_1}
+
+if [ $? -eq 1 ]; then
+    echo "Spark任务执行失败"
+    exit 1
+fi
 
 # 2 加载上次模型 训练本轮数据 保存本轮模型
 end_date=${today}
@@ -43,13 +44,13 @@ while [[ "$loop_date" != "$end_date" ]]; do
 done
 
 # 3 本轮模型格式转换
-cat ${MODEL_PATH}/${model_name}_${yesterday}.txt \
+cat ${MODEL_PATH}/${model_name}_${today_early_1}.txt \
 | sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
-> ${MODEL_PATH}/${model_name}_${yesterday}_change.txt
+> ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt
 
 # 4 转换后模型上传oss
 $HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
-$HADOOP fs -put ${MODEL_PATH}/${model_name}_${yesterday}_change.txt ${OSS_PATH}/${model_name}_change.txt
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${OSS_PATH}/${model_name}_change.txt
 
 
-#nohup sh 05_update_everyday_ros.sh > p.log 2>&1 &
+#nohup sh 05_update_everyday_ros.sh > p.log 2>&1 &

+ 56 - 42
zhangbo/05_update_everyday_str.sh

@@ -1,42 +1,56 @@
-##!/bin/sh
-## 0 全局变量/参数
-#savePath=/dw/recommend/model/share_ratio_samples_v2/
-#model_name=model_sharev2
-#today="$(date -d '1 days ago' +%Y%m%d)"
-#yesterday="$(date -d '2 days ago' +%Y%m%d)"
-#yesterday_early=20231222
-##yesterday_early="$(date -d '3 days ago' +%Y%m%d)"
-#
-#
-#HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
-#FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
-#MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
-#OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
-#
-## 1 生产数据
-#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
-#--class com.aliyun.odps.spark.examples.makedata.makedata_01_readtable2hdfs \
-#--master yarn --driver-memory 1G --executor-memory 4G --executor-cores 2 --num-executors 16 \
-#./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-#tablePart:32 savePath:${savePath} beginStr:${yesterday} endStr:${yesterday}
-#
-## 2 加载上次模型 训练本轮数据 保存本轮模型
-#end_date=${today}
-#loop_date=${yesterday_early}
-#while [[ "$loop_date" != "$end_date" ]]; do
-#    echo -------train ${loop_date}----------
-#    loop_date_model=$(date -d "$loop_date - 1 day" +%Y%m%d)
-#    $HADOOP fs -text ${savePath}/dt=${loop_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${loop_date}.txt \
-#-dim 1,1,0 -core 8 -im ${MODEL_PATH}/${model_name}_${loop_date_model}.txt
-#    echo -------save ${MODEL_PATH}/${model_name}_${loop_date}.txt----------
-#    loop_date=$(date -d "$loop_date + 1 day" +%Y%m%d)
-#done
-#
-## 3 本轮模型格式转换
-#cat ${MODEL_PATH}/${model_name}_${yesterday}.txt \
-#| sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
-#> ${MODEL_PATH}/${model_name}_${yesterday}_change.txt
-#
-## 4 转换后模型上传oss
-#$HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
-#$HADOOP fs -put ${MODEL_PATH}/${model_name}_${yesterday}_change.txt ${OSS_PATH}/${model_name}_change.txt
+#!/bin/sh
+set -e
+set -x
+# 0 全局变量/参数
+savePath=/dw/recommend/model/share_ratio_samples_v2/
+model_name=model_sharev2
+today="$(date +%Y%m%d)"
+today_early_1="$(date -d '1 days ago' +%Y%m%d)"
+#yesterday="$(date -d '1 days ago' +%Y%m%d)"
+yesterday=20231221
+jar_main=makedata_01_readtable2hdfs
+
+HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
+FM_TRAIN="/root/sunmingze/alphaFM/bin/fm_train"
+MODEL_PATH="/root/zhangbo/recommend-emr-dataprocess/zhangbo/model/"
+OSS_PATH="oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model"
+
+# 1 生产数据
+/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+--class com.aliyun.odps.spark.examples.makedata.${jar_main} \
+--master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
+/root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+tablePart:32 savePath:${savePath} beginStr:${today_early_1} endStr:${today_early_1}
+
+if [ $? -eq 1 ]; then
+    echo "Spark任务执行失败"
+    exit 1
+fi
+
+# 2 加载上次模型 训练本轮数据 保存本轮模型
+end_date=${today}
+loop_date=${yesterday}
+while [[ "$loop_date" != "$end_date" ]]; do
+    echo -------train ${loop_date}----------
+    loop_date_model=$(date -d "$loop_date - 1 day" +%Y%m%d)
+    $HADOOP fs -text ${savePath}/dt=${loop_date}/* | ${FM_TRAIN} -m ${MODEL_PATH}/${model_name}_${loop_date}.txt \
+-dim 1,1,0 -core 8 -im ${MODEL_PATH}/${model_name}_${loop_date_model}.txt
+    if [ $? -eq 1 ]; then
+        echo "训练失败"
+        exit 1
+    fi
+    echo -------save ${MODEL_PATH}/${model_name}_${loop_date}.txt----------
+    loop_date=$(date -d "$loop_date + 1 day" +%Y%m%d)
+done
+
+# 3 本轮模型格式转换
+cat ${MODEL_PATH}/${model_name}_${today_early_1}.txt \
+| sed '1d' | awk -F " " '{if($2!="0") print $1"\t"$2}' \
+> ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt
+
+# 4 转换后模型上传oss
+$HADOOP fs -rm -r oss://art-recommend.oss-cn-hangzhou.aliyuncs.com/video_str_model/${model_name}_change.txt
+$HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${OSS_PATH}/${model_name}_change.txt
+
+
+#nohup sh 05_update_everyday_str.sh > p.log 2>&1 &