Przeglądaj źródła

推荐样本生产-特征分桶

zhangbo 10 miesięcy temu
rodzic
commit
570a9138d9

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_15_bucket_20240608.scala

@@ -42,7 +42,7 @@ object makedata_15_bucket_20240608 {
     val date = param.getOrElse("date", "20240607")
     val readPath = param.getOrElse("readPath", "/dw/recommend/model/14_feature_data/")
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/15_bucket_data/")
-    val bucketNum = param.getOrElse("bucketNum", "999").toInt
+    val bucketNum = param.getOrElse("bucketNum", "200").toInt
 
     val data = sc.textFile(readPath + partitionPrefix)
     val data1 = data.map(r => {

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -89,5 +89,5 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
 --master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-partitionPrefix:20240607 date:20240607_bucket \
+partitionPrefix:20240607 date:20240607_200 bucketNum:200 \
 > p15_data.log 2>&1 &