zhangbo 1 year ago
parent
commit
729f78859d

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_09_user2redis_freq.scala

@@ -95,7 +95,7 @@ object makedata_09_user2redis_freq {
 
     //5 用户区分
     val savePathPart = savePathUser + "/all/" + partition
-    val userDataRead = sc.textFile(savePathPart).filter(_.split("\t").length >= 2)
+    val userDataRead = sc.textFile(savePathPart).repartition(100).filter(_.split("\t").length >= 2)
       .map(r => {
         val rList = r.split("\t")
         (rList(0), rList(1))
@@ -129,7 +129,7 @@ object makedata_09_user2redis_freq {
       val count = userDataRead.count()
       println("待写入数据有:" + count)
       if (count > redisLimit) {
-        println("数据量超过2亿,不执行写入。")
+        println(s"数据量超过${redisLimit},不执行写入。")
       } else {
         val userDataTakeRddRun = userDataRead.mapPartitions(row => {
           val redisFormat = new util.HashMap[String, String]

+ 5 - 6
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -7,12 +7,12 @@ tablePart:64 savePath:/dw/recommend/model/10_sample_data_v3/ beginStr:20240227 e
 
 
 
-
+[ros样本生产]
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_12_rosData_v3 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-savePath:/dw/recommend/model/12_ros_data_v3/ beginStr:20240222 endStr:20240226 ifRepart:10 \
+savePath:/dw/recommend/model/12_ros_data_v3/ beginStr:20240226 endStr:20240227 ifRepart:10 \
 > p12_1.log 2>&1 &
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
@@ -22,13 +22,12 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 savePath:/dw/recommend/model/12_ros_data_v3_noweight/ beginStr:20240222 endStr:20240226 ifRepart:10 \
 > p12_2.log 2>&1 &
 
-
-
+[str样本生产]
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_11_strData_v3 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 64 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-savePath:/dw/recommend/model/11_str_data_v3/ beginStr:20240222 endStr:20240225 ifRepart:100 \
+savePath:/dw/recommend/model/11_str_data_v3/ beginStr:20240226 endStr:20240227 ifRepart:100 \
 > p11.log 2>&1 &
 
 
@@ -39,5 +38,5 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 4G --executor-cores 1 --num-executors 32 \
 --conf spark.yarn.executor.memoryoverhead=1024 \
 /root/zhangbo/recommend-emr-dataprocess/target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-date:20240227 tablePart:64 expireDay:3 ifWriteRedisUser:False ifUser:False midDays:7 redisLimit:100000000 \
+date:20240227 tablePart:64 expireDay:3 ifWriteRedisUser:True ifUser:False midDays:15 redisLimit:50000000 \
 savePathUser:/dw/recommend/model/09_feature/user/ > p09.log 2>&1 &