zhangbo 1 سال پیش
والد
کامیت
20dd8ca9ad

+ 2 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_09_user2redis_freq.scala

@@ -23,6 +23,7 @@ object makedata_09_user2redis_freq {
       .appName(this.getClass.getName)
       .getOrCreate()
     val sc = spark.sparkContext
+    sc.setCheckpointDir("/dw/recommend/model/99_zhangbo_checkpoint/")
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
@@ -92,7 +93,6 @@ object makedata_09_user2redis_freq {
       .reduceByKey((a, b) => Math.max(a.toLong, b.toLong).toString)
       .filter(r => DateUtils.parseDate(date, Array[String]("yyyyMMdd")).getTime / 1000 - r._2.toLong / 1000 < 3600 * 24 * midDays)
     println("------------mid处理完毕,近期保留的用户有:" + midRdd.count() + "------------------")
-
     //5 用户区分
     val savePathPart = savePathUser + "/all/" + partition
     val userDataRead = sc.textFile(savePathPart).filter(_.split("\t").length >= 2)
@@ -100,6 +100,7 @@ object makedata_09_user2redis_freq {
         val rList = r.split("\t")
         (rList(0), rList(1))
       }).join(midRdd).map(r => (r._1, r._2._1))
+    userDataRead.checkpoint()
 //      .leftOuterJoin(midRdd).map {
 //        case (mid, (fea, Some(_))) =>
 //          (mid, fea, true)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -38,5 +38,5 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
 --conf spark.yarn.executor.memoryoverhead=1024 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-date:20240229 tablePart:64 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \
+date:20240302 tablePart:96 expireDay:3 ifWriteRedisUser:True ifUser:True midDays:14 redisLimit:80000000 \
 savePathUser:/dw/recommend/model/09_feature/user/ > p09.log 2>&1 &