Prechádzať zdrojové kódy

增加测试两个表是否相同

xueyiming 2 mesiacov pred
rodič
commit
0731e7cdd8

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/diff_data_20240718.scala

@@ -29,7 +29,7 @@ object diff_data_20240718 {
     val table = param.getOrElse("table", "ad_easyrec_train_data_v1")
     val partition = "dt=20250101"
 
-    val readPath = param.getOrElse("readPath", "/test/33_ad_train_data/20250213*")
+    val readPath = param.getOrElse("readPath", "/test/33_ad_train_data/20250216*")
     val savePath = param.getOrElse("savePath", "/test/diff")
 
     val data = sc.textFile(readPath)

+ 1 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_31_originData_hive_20240718.scala

@@ -65,7 +65,7 @@ object makedata_ad_31_originData_hive_20240718 {
     // 3 循环执行数据生产
     val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
     for (dt <- dateRange) {
-      val timeRange = MyDateUtils.getDateHourRange(dt + "08", dt + "23")
+      val timeRange = MyDateUtils.getDateHourRange(dt + "08", dt + "08")
       val list = timeRange.map { dt_hh =>
           val dt = dt_hh.substring(0, 8)
           val hh = dt_hh.substring(8, 10)

+ 2 - 2
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketData_logKey_20240718.scala

@@ -48,8 +48,8 @@ object makedata_ad_33_bucketData_logKey_20240718 {
     val param = ParamUtils.parseArgs(args)
     val readPath = param.getOrElse("readPath", "/dw/recommend/model/31_ad_sample_data_v4/")
     val savePath = param.getOrElse("savePath", "/test/33_ad_train_data/")
-    val beginStr = param.getOrElse("beginStr", "20250213")
-    val endStr = param.getOrElse("endStr", "20250213")
+    val beginStr = param.getOrElse("beginStr", "20250216")
+    val endStr = param.getOrElse("endStr", "20250216")
     val repartition = param.getOrElse("repartition", "100").toInt
     val filterNames = param.getOrElse("filterNames", "").split(",").filter(_.nonEmpty).toSet
     val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")