zhangbo 10 hónapja
szülő
commit
0ad2db1260

+ 1 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529.scala

@@ -31,7 +31,6 @@ object makedata_13_originData_20240529 {
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/13_sample_data/")
     val project = param.getOrElse("project", "loghubods")
     val table = param.getOrElse("table", "XXXX")
-    val repartition = param.getOrElse("repartition", "10").toInt
 
     // 2 读取odps+表信息
     val odpsOps = env.getODPS(sc)
@@ -246,7 +245,7 @@ object makedata_13_originData_20240529 {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        odpsData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        odpsData.saveAsTextFile(hdfsPath, classOf[GzipCodec])
       }else{
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_14_valueData_20240608.scala

@@ -82,7 +82,7 @@ object makedata_14_valueData_20240608 {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        data1.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        data1.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       } else {
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 6 - 6
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -71,23 +71,23 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-tablePart:32 \
-beginStr:2024060600 endStr:2024060723 \
+tablePart:64 \
+beginStr:2024060700 endStr:2024060723 \
 table:alg_recsys_sample_all \
-> p13_data.log 2>&1 &
+> p13_data0607.log 2>&1 &
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240606 endStr:20240607 \
+beginStr:20240606 endStr:20240607 repartition:1000 \
 > p14_data.log 2>&1 &
 
 
 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
 --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
---master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 32 \
+--master yarn --driver-memory 32G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
 readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200 \
 bucketNum:200 \
@@ -98,7 +98,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609 \
 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240606 endStr:20240607 \
+beginStr:20240606 endStr:20240607 repartition:1000 \
 > p16_data.log 2>&1 &