xueyiming 2 месяцев назад
Родитель
Сommit
7fa40b8388

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketData_hive_20240718.scala

@@ -53,7 +53,7 @@ object makedata_ad_33_bucketData_hive_20240718 {
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data_v4/")
     val beginStr = param.getOrElse("beginStr", "20250213")
     val endStr = param.getOrElse("endStr", "20250213")
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
@@ -65,7 +65,7 @@ object makedata_ad_33_bucketData_hive_20240718 {
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
       println("开始执行:" + date)
-      val data = sc.textFile(readPath + "/" + date + "*").map(r => {
+      val data = sc.textFile(readPath + "/" + date + "08").map(r => {
         val rList = r.split("\t")
         val logKey = rList(0)
         val labelKey = rList(1)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketData_logKey_20240718.scala

@@ -57,7 +57,7 @@ object makedata_ad_33_bucketData_logKey_20240718 {
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {
       println("开始执行:" + date)
-      val data = sc.textFile(readPath + "/" + date + "*").map(r=>{
+      val data = sc.textFile(readPath + "/" + date + "08").map(r=>{
         val rList = r.split("\t")
         val logKey = rList(0)
         val labelKey = rList(1)