Parcourir la source

写入测试数据

xueyiming il y a 2 mois
Parent
commit
ae7f5dc1d4

+ 6 - 4
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20240718.scala

@@ -24,6 +24,12 @@ object diff_data_20240718 {
     val project = param.getOrElse("project", "loghubods")
     val table = param.getOrElse("table", "ad_easyrec_train_data_v1")
     val partition = "dt=20250101"
+
+    val readPath = param.getOrElse("readPath", "/test/33_ad_train_data/20250101")
+    val data = sc.textFile(readPath)
+
+
+
     // 2 读取odps+表信息
     val odpsOps = env.getODPS(sc)
     val odpsData = odpsOps.readTable(project = project,
@@ -31,10 +37,6 @@ object diff_data_20240718 {
       partition = partition,
       transfer = func,
       numPartition = 64)
-    val randomRow = odpsData.takeSample(withReplacement = false, num = 10)
-    for (cc <- randomRow) {
-      println(cc)
-    }
   }
 
   def func(record: Record, schema: TableSchema): Map[String, String] = {

+ 3 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketData_hive_20240718.scala

@@ -54,13 +54,13 @@ object makedata_ad_33_bucketData_hive_20240718 {
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
     val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/")
-    val beginStr = param.getOrElse("beginStr", "20240620")
-    val endStr = param.getOrElse("endStr", "20240620")
+    val beginStr = param.getOrElse("beginStr", "20250213")
+    val endStr = param.getOrElse("endStr", "20250213")
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
     val project = param.getOrElse("project", "loghubods")
     val table = param.getOrElse("table", "ad_easyrec_train_data_v1")
-    val partition = "dt=20250101"
+    val partition = param.getOrElse("partition", "dt=20250101")
 
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (date <- dateRange) {

+ 3 - 3
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketData_logKey_20240718.scala

@@ -46,10 +46,10 @@ object makedata_ad_33_bucketData_logKey_20240718 {
 
     // 1 读取参数
     val param = ParamUtils.parseArgs(args)
-    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data/")
+    val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data_v4/")
     val savePath = param.getOrElse("savePath", "/test/33_ad_train_data/")
-    val beginStr = param.getOrElse("beginStr", "20240620")
-    val endStr = param.getOrElse("endStr", "20240620")
+    val beginStr = param.getOrElse("beginStr", "20250213")
+    val endStr = param.getOrElse("endStr", "20250213")
     val repartition = param.getOrElse("repartition", "100").toInt
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")