zhangbo 10 місяців тому
батько
коміт
30978b93b4

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_15_bucket_20240608.scala

@@ -49,7 +49,7 @@ object makedata_15_bucket_20240608 {
       val rList = r.split("\t")
       val doubles = rList(2).split(",").map(_.toDouble)
       doubles
-    }).sample(false, sampleRate )
+    }).sample(false, sampleRate ).repartition(20)
 
     val result = new ArrayBuffer[String]()
 

+ 5 - 5
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -72,9 +72,9 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 16 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 tablePart:32 \
-beginStr:2024060706 endStr:2024060715 \
+beginStr:2024060716 endStr:2024060723 \
 table:alg_recsys_sample_all \
-> p13_data060706.log 2>&1 &
+> p13_data060716.log 2>&1 &
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
@@ -90,9 +90,9 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
 --conf spark.driver.maxResultSize=16G \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200 \
-bucketNum:200 sampleRate:0.1 \
-> p15_data.log 2>&1 &
+readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200_v2 \
+bucketNum:200 sampleRate:0.01 \
+> p15_data2.log 2>&1 &
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \