Procházet zdrojové kódy

从hive输入到hive输出

xueyiming před 2 měsíci
rodič
revize
52a33b2faa

+ 6 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_hive_20240718.scala

@@ -1,7 +1,8 @@
 package com.aliyun.odps.spark.examples.makedata_ad.v20240718
 
 import com.alibaba.fastjson.{JSON, JSONObject}
-import com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_31_originData_20240718.func
+import com.aliyun.odps.TableSchema
+import com.aliyun.odps.data.Record
 import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, ParamUtils, env}
 import examples.extractor.{ExtractorUtils, RankExtractorFeature_20240530}
 import examples.utils.{AdUtil, DateTimeUtil}
@@ -47,10 +48,11 @@ object makedata_ad_31_originData_hive_20240718 {
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
     val tablePart = param.getOrElse("tablePart", "64").toInt
-    val beginStr = param.getOrElse("beginStr", "2024062008")
-    val endStr = param.getOrElse("endStr", "2024062023")
+    val beginStr = param.getOrElse("beginStr", "20250216")
+    val endStr = param.getOrElse("endStr", "20250216")
     val project = param.getOrElse("project", "loghubods")
     val inputTable = param.getOrElse("inputTable", "alg_recsys_ad_sample_all")
+    val outputTable = param.getOrElse("outputTable", "ad_easyrec_train_data_v1")
     val filterHours = param.getOrElse("filterHours", "00,01,02,03,04,05,06,07").split(",").toSet
     val idDefaultValue = param.getOrElse("idDefaultValue", "1.0").toDouble
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
@@ -519,7 +521,7 @@ object makedata_ad_31_originData_hive_20240718 {
         }
 
       val partition = s"dt=$dt"
-      odpsOps.saveToTable(project, table, partition, list, write, defaultCreate = true, overwrite = true)
+      odpsOps.saveToTable(project, outputTable, "dt=20250101", list, write, defaultCreate = true, overwrite = true)
     }
   }