Browse Source

feat:添加分桶脚本

zhaohaipeng 2 months ago
parent
commit
4fe06c96b5

+ 3 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_recsys/v20250218/makedata_recsys_43_bucketData_20250218.scala

@@ -36,9 +36,9 @@ object makedata_recsys_43_bucketData_20250218 {
 
     val loader = getClass.getClassLoader
     val resourceUrlBucket = loader.getResource(fileName)
-
     val buckets = FileUtils.readFile(resourceUrlBucket)
     println(buckets)
+
     val bucketsMap = buckets.split("\n")
       .map(r => r.replace(" ", "").replaceAll("\n", ""))
       .filter(r => r.nonEmpty)
@@ -52,7 +52,8 @@ object makedata_recsys_43_bucketData_20250218 {
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
       println("开始执行:" + date)
-      val data = sc.textFile(readPath + "/" + date + "*").map(r => {
+      println(readPath + "/" + date + "/*")
+      val data = sc.textFile(readPath + "/" + date + "/*").map(r => {
           val rList = r.split("\t")
           val logKey = rList(0)
           val labelKey = rList(1)