1 éve · 0ad2db1260
--- a/src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529.scala
+++ b/src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_13_originData_20240529.scala
@@ -31,7 +31,6 @@ object makedata_13_originData_20240529 {
 
				     val savePath = param.getOrElse("savePath", "/dw/recommend/model/13_sample_data/")
			
 
				     val project = param.getOrElse("project", "loghubods")
			
 
				     val table = param.getOrElse("table", "XXXX")
			
 
				-    val repartition = param.getOrElse("repartition", "10").toInt
			
 
				 
			
 
				     // 2 读取odps+表信息
			
 
				     val odpsOps = env.getODPS(sc)
			
@@ -246,7 +245,7 @@ object makedata_13_originData_20240529 {
 
				       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")){
			
 
				         println("删除路径并开始数据写入:" + hdfsPath)
			
 
				         MyHdfsUtils.delete_hdfs_path(hdfsPath)
			
 
				-        odpsData.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
			
 
				+        odpsData.saveAsTextFile(hdfsPath, classOf[GzipCodec])
			
 
				       }else{
			
 
				         println("路径不合法，无法写入:" + hdfsPath)
			
 
				       }
			
--- a/src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_14_valueData_20240608.scala
+++ b/src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_14_valueData_20240608.scala
@@ -82,7 +82,7 @@ object makedata_14_valueData_20240608 {
 
				       if (hdfsPath.nonEmpty && hdfsPath.startsWith("/dw/recommend/model/")) {
			
 
				         println("删除路径并开始数据写入:" + hdfsPath)
			
 
				         MyHdfsUtils.delete_hdfs_path(hdfsPath)
			
 
				-        data1.repartition(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
			
 
				+        data1.coalesce(repartition).saveAsTextFile(hdfsPath, classOf[GzipCodec])
			
 
				       } else {
			
 
				         println("路径不合法，无法写入:" + hdfsPath)
			
 
				       }
			
--- a/src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本
+++ b/src/main/scala/com/aliyun/odps/spark/examples/临时记录的脚本
@@ -71,23 +71,23 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 
				 --class com.aliyun.odps.spark.examples.makedata.makedata_13_originData_20240529 \
			
 
				 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
			
 
				 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				-tablePart:32 \
			
 
				-beginStr:2024060600 endStr:2024060723 \
			
 
				+tablePart:64 \
			
 
				+beginStr:2024060700 endStr:2024060723 \
			
 
				 table:alg_recsys_sample_all \
			
 
				-> p13_data.log 2>&1 &
			
 
				+> p13_data0607.log 2>&1 &
			
 
				 
			
 
				 
			
 
				 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
			
 
				 --class com.aliyun.odps.spark.examples.makedata.makedata_14_valueData_20240608 \
			
 
				 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
			
 
				 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				-beginStr:20240606 endStr:20240607 \
			
 
				+beginStr:20240606 endStr:20240607 repartition:1000 \
			
 
				 > p14_data.log 2>&1 &
			
 
				 
			
 
				 
			
 
				 nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.spark.deploy.SparkSubmit \
			
 
				 --class com.aliyun.odps.spark.examples.makedata.makedata_15_bucket_20240608 \
			
 
				---master yarn --driver-memory 16G --executor-memory 1G --executor-cores 1 --num-executors 32 \
			
 
				+--master yarn --driver-memory 32G --executor-memory 1G --executor-cores 1 --num-executors 32 \
			
 
				 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				 readPath:/dw/recommend/model/14_feature_data/20240606/ fileName:20240606_200 \
			
 
				 bucketNum:200 \
			
@@ -98,7 +98,7 @@ nohup /opt/apps/SPARK2/spark-2.4.8-hadoop3.2-1.0.8/bin/spark-class2 org.apache.s
 
				 --class com.aliyun.odps.spark.examples.makedata.makedata_16_bucketData_20240609 \
			
 
				 --master yarn --driver-memory 1G --executor-memory 1G --executor-cores 1 --num-executors 32 \
			
 
				 ./target/spark-examples-1.0.0-SNAPSHOT-shaded.jar \
			
 
				-beginStr:20240606 endStr:20240607 \
			
 
				+beginStr:20240606 endStr:20240607 repartition:1000 \
			
 
				 > p16_data.log 2>&1 &