zhangbo 1 year ago
parent
commit
bdc6f8cced

+ 3 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_09_user2redis_freq.scala

@@ -95,18 +95,18 @@ object makedata_09_user2redis_freq {
 
     //5 用户区分
     val savePathPart = savePathUser + "/all/" + partition
-    val userDataRead = sc.textFile(savePathPart).repartition(100).filter(_.split("\t").length >= 2)
+    val userDataRead = sc.textFile(savePathPart).filter(_.split("\t").length >= 2)
       .map(r => {
         val rList = r.split("\t")
         (rList(0), rList(1))
-      }).join(midRdd).map(r => (r._1, r._2._1, true))
+      }).join(midRdd).map(r => (r._1, r._2._1))
 //      .leftOuterJoin(midRdd).map {
 //        case (mid, (fea, Some(_))) =>
 //          (mid, fea, true)
 //        case (mid, (fea, None)) =>
 //          (mid, fea, false)
 //      }
-    val userDataReadTrue = userDataRead.filter(_._3).map(r => r._1 + "\t" + r._2)
+    val userDataReadTrue = userDataRead.map(r => r._1 + "\t" + r._2)
     // val userDataReadFalse = userDataRead.filter(!_._3).map(r => r._1 + "\t" + r._2)
     if (savePathUser.nonEmpty && savePathUser.startsWith("/dw/recommend/model/")) {
       val p1 = savePathUser + "/true/" + partition

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -12,7 +12,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-savePath:/dw/recommend/model/12_ros_data_v3/ beginStr:20240227 endStr:20240227 ifRepart:10 \
+savePath:/dw/recommend/model/12_ros_data_v3_test/ beginStr:20240228 endStr:20240228 ifRepart:10 \
 > p12_1.log 2>&1 &
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
@@ -38,5 +38,5 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
 --conf spark.yarn.executor.memoryoverhead=1024 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-date:20240228 tablePart:64 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \
+date:20240229 tablePart:64 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \
 savePathUser:/dw/recommend/model/09_feature/user/ > p09.log 2>&1 &

+ 1 - 1
zhangbo/05_update_everyday_2model.sh

@@ -148,4 +148,4 @@ fi
 $HADOOP fs -put ${MODEL_PATH}/${model_name}_${today_early_1}_change.txt ${online_model_path}
 
 
-# nohup sh 05_update_everyday_2model.sh > p05.log 2>&1 &
+# nohup sh 05_update_everyday_2model.sh > p5.log 2>&1 &

+ 1 - 1
zhangbo/06_update_everyday_feature.sh

@@ -110,4 +110,4 @@ else
     echo "---------user写入redis执行成功---------"
 fi
 
-#nohup sh 06_update_everyday_feature.sh > p2.log 2>&1 &
+#nohup sh 06_update_everyday_feature.sh > p6.log 2>&1 &