Browse Source

repart 更新

zhangbo 11 months ago
parent
commit
5945939935

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_15_bucket_20240608.scala

@@ -49,13 +49,13 @@ object makedata_15_bucket_20240608 {
       val rList = r.split("\t")
       val rList = r.split("\t")
       val doubles = rList(2).split(",").map(_.toDouble)
       val doubles = rList(2).split(",").map(_.toDouble)
       doubles
       doubles
-    }).sample(false, sampleRate).collect()
+    }).sample(false, sampleRate )
 
 
     val result = new ArrayBuffer[String]()
     val result = new ArrayBuffer[String]()
 
 
     for (i <- contentList.indices){
     for (i <- contentList.indices){
       println("特征:" + contentList(i))
       println("特征:" + contentList(i))
-      val data2 = data1.map(r => r(i)).filter(_ > 1E-8).sorted
+      val data2 = data1.map(r => r(i)).filter(_ > 1E-8).collect().sorted
       val len = data2.length
       val len = data2.length
       val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
       val oneBucketNum = (len - 1) / (bucketNum - 1) + 1 // 确保每个桶至少有一个元素
       val buffers = new ArrayBuffer[Double]()
       val buffers = new ArrayBuffer[Double]()

+ 3 - 2
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -87,10 +87,11 @@ beginStr:20240606 endStr:20240606 repartition:1000 \
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
 --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
---master yarn --driver-memory 32G --executor-memory 1G --executor-cores 1 --num-executors 32 \
+--master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 16 \
+--conf spark.driver.maxResultSize=16G \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200 \
 readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200 \
-bucketNum:200 \
+bucketNum:200 sampleRate:0.1 \
 > p15_data.log 2>&1 &
 > p15_data.log 2>&1 &