zhangbo 10 mesi fa
parent
commit
cc1aa739bb

+ 2 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529.scala

@@ -31,6 +31,7 @@ object makedata_13_originData_20240529 {
     val savePath = param.getOrElse("savePath", "/dw/recommend/model/13_sample_data/")
     val project = param.getOrElse("project", "loghubods")
     val table = param.getOrElse("table", "XXXX")
+    val repartition = param.getOrElse("repartition", "100").toInt
 
     // 2 读取odps+表信息
     val odpsOps = env.getODPS(sc)
@@ -245,7 +246,7 @@ object makedata_13_originData_20240529 {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        odpsData.saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        odpsData.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       }else{
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_14_valueData_20240608.scala

@@ -81,7 +81,7 @@ object makedata_14_valueData_20240608 {
       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
         println("删除路径并开始数据写入:" + hdfsPath)
         MyHdfsUtils.delete_hdfs_path(hdfsPath)
-        data1.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
+        data1.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
       } else {
         println("路径不合法,无法写入:" + hdfsPath)
       }

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本

@@ -71,7 +71,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \
 --master yarn --driver-memory 1G --executor-memory 2G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-tablePart:32 repartition:32 \
+tablePart:64 repartition:32 \
 beginStr:2024060712 endStr:2024060716 \
 table:alg_recsys_sample_all \
 > p13_data060712.log 2>&1 &
@@ -81,7 +81,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \
 --master yarn --driver-memory 1G --executor-memory 3G --executor-cores 1 --num-executors 32 \
 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
-beginStr:20240606 endStr:20240606 repartition:200 \
+beginStr:20240607 endStr:20240607 repartition:200 \
 > p14_data.log 2>&1 &