zhangbo 1 year ago
parent
commit
9abbad7c78

+ 1 - 1
zhangbo/05_update_everyday_2model.sh

@@ -148,4 +148,4 @@ fi
 $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}
 
 
-#nohup sh 05_update_everyday_2model.sh > p05.log 2>&1 &
+# nohup sh 05_update_everyday_2model.sh > p05.log 2>&1 &

+ 35 - 9
zhangbo/06_update_everyday_feature.sh

@@ -3,7 +3,6 @@ set -ex
 # 0 全局变量/参数
 today="$(date +%Y%m%d)"
 today_early_1="$(date -d '1 days ago' +%Y%m%d)"
-yesterday="$(date -d '1 days ago' +%Y%m%d)"
 
 HADOOP="/opt/apps/HADOOP-COMMON/hadoop-common-current/bin/hadoop"
 export SPARK_HOME=/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8
@@ -39,7 +38,7 @@ conda deactivate
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 16 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 date:${today_early_1} tablePart:32 expireDay:4 ifDebug:False \
-ifVideo:True ifWriteRedis:True savePathVideo:/dw/recommend/model/feature/video
+ifVideo:True ifWriteRedis:True savePathVideo:/dw/recommend/model/09_feature/video
 
 if [ $? -eq 1 ]; then
     echo "---------item写入redis执行失败---------"
@@ -65,17 +64,44 @@ while true; do
     exit 1
   fi
 done
+
+# 3 检查mid 时间,上游表
+while true; do
+  python_return_code=$(python utils.py --excute_program check_hive --partition ${today_early_1} --project loghubods --table mid_uid)
+  if [ $python_return_code -eq 0 ]; then
+    echo "Python程序返回0,退出循环。上游表loghubods.mid_uid=${today_early_1} 已生产完毕"
+    break
+  fi
+  echo "Python程序返回非0值,等待五分钟后再次调用。上游表loghubods.mid_uid=${today_early_1} 未完成"
+  sleep 300
+  current_hour=$(date +%H)
+  current_minute=$(date +%M)
+  if (( current_hour > max_hour || (current_hour == max_hour && current_minute >= max_minute) )); then
+    echo "最长等待时间已到,失败:${current_hour}-${current_minute}"
+    exit 1
+  fi
+done
+
 conda deactivate
-# 3 user 生产数据
+# 4 user 生产数据
+#/opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
+#--class com.aliyun.odps.spark.examples.makedata.makedata_09_user2redis \
+#--name makedata_09_user2redis_${today} \
+#--master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
+#--conf spark.yarn.executor.memoryoverhead=1024 \
+#/root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
+#date:${today_early_1} tablePart:32 expireDay:3 ifDebug:False \
+#ifUser:True ifDeleteRedisUser:False ifWriteRedisUser:True sampleRate:1.0 midDays:7 \
+#savePathUser:/dw/recommend/model/feature/user/
+
 /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
---class com.aliyun.odps.spark.examples.makedata.makedata_09_user2redis \
---name makedata_09_user2redis_${today} \
---master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
+--class com.aliyun.odps.spark.examples.makedata.makedata_09_user2redis_freq \
+--name makedata_09_user2redis_freq \
+--master yarn --driver-memory 1G --executor-memory 5G --executor-cores 1 --num-executors 32 \
 --conf spark.yarn.executor.memoryoverhead=1024 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-date:${today_early_1} tablePart:32 expireDay:3 ifDebug:False \
-ifUser:True ifDeleteRedisUser:False ifWriteRedisUser:True sampleRate:1.0 midDays:7 \
-savePathUser:/dw/recommend/model/feature/user/
+date:${today_early_1} tablePart:64 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:100000000 \
+savePathUser:/dw/recommend/model/09_feature/user/
 
 if [ $? -eq 1 ]; then
     echo "---------user写入redis执行失败---------"

+ 32 - 1
zhangbo/50_delete_hdfs.sh

@@ -33,4 +33,35 @@ if [ $? -eq 0 ]; then
     $HADOOP fs -rm -r -skipTrash ${path}
 else
     echo "${path} 数据不存在"
-fi
+fi
+
+
+
+function delete_path() {
+    if [ "$#" -ne 2 ]; then
+        echo "Usage: delete_path <early> <path>"
+        return 1
+    fi
+    early=$1
+    path=$2
+    date="$(date -d "${early} days ago" +%Y%m%d)"
+    path_delete=${path}${date}
+    $HADOOP fs -test -e ${path_delete}
+    if [ $? -eq 0 ]; then
+        echo "${path_delete} 数据存在, 删除。"
+        if $HADOOP fs -rm -r -skipTrash "${path_delete}"; then
+            echo "删除成功。"
+        else
+            echo "删除失败。"
+        fi
+    else
+        echo "${path_delete} 数据不存在"
+    fi
+}
+
+delete_path 7 /dw/recommend/model/11_str_data_v3/dt=
+delete_path 7 /dw/recommend/model/12_ros_data_v3/dt=
+delete_path 7 /dw/recommend/model/10_sample_data_v3/dt=
+delete_path 3 /dw/recommend/model/09_feature/user/all/dt=
+delete_path 3 /dw/recommend/model/09_feature/user/true/dt=
+delete_path 3 /dw/recommend/model/09_feature/video/dt=