|
@@ -3,14 +3,11 @@ package com.aliyun.odps.spark.examples.makedata_ad.v20240718
|
|
|
import com.alibaba.fastjson.JSON
|
|
|
import com.aliyun.odps.TableSchema
|
|
|
import com.aliyun.odps.data.Record
|
|
|
-import com.aliyun.odps.spark.examples.makedata_ad.v20240718.makedata_ad_32_bucket_hive_test.write
|
|
|
-import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, MyHdfsUtils, ParamUtils, env}
|
|
|
+import com.aliyun.odps.spark.examples.myUtils.{MyDateUtils, ParamUtils, env}
|
|
|
import examples.extractor.ExtractorUtils
|
|
|
-import org.apache.hadoop.io.compress.GzipCodec
|
|
|
import org.apache.spark.sql.SparkSession
|
|
|
|
|
|
import scala.collection.JavaConversions._
|
|
|
-import scala.collection.mutable.ArrayBuffer
|
|
|
import scala.io.Source
|
|
|
|
|
|
/*
|
|
@@ -60,7 +57,7 @@ object makedata_ad_33_bucketData_hive_20240718 {
|
|
|
val whatLabel = param.getOrElse("whatLabel", "ad_is_conversion")
|
|
|
val project = param.getOrElse("project", "loghubods")
|
|
|
val table = param.getOrElse("table", "ad_easyrec_train_data_v1")
|
|
|
- val partition = param.getOrElse("partition", "dt=20250101")
|
|
|
+ val partition = param.getOrElse("partition", "dt=20250208")
|
|
|
|
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
|
for (date <- dateRange) {
|
|
@@ -107,7 +104,7 @@ object makedata_ad_33_bucketData_hive_20240718 {
|
|
|
}
|
|
|
|
|
|
// 4 hive
|
|
|
- odpsOps.saveToTable(project, table, partition, list, write, defaultCreate = true)
|
|
|
+ odpsOps.saveToTable(project, table, partition, list, write, defaultCreate = true, overwrite = true)
|
|
|
}
|
|
|
|
|
|
|