|
@@ -69,6 +69,7 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
|
|
|
}).toMap
|
|
|
val bucketsMap_br = sc.broadcast(bucketsMap)
|
|
|
val denseFeatureNames = bucketsMap.keySet
|
|
|
+ val lowerCaseDenseFeatureNames = bucketsMap.keySet.map(_.toLowerCase)
|
|
|
val sparseFeatureNames = Set(
|
|
|
"cid", "adid", "adverid", "targeting_conversion",
|
|
|
"region", "city", "brand",
|
|
@@ -90,6 +91,29 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
|
|
|
|
|
|
// 2 读取odps+表信息
|
|
|
val odpsOps = env.getODPS(sc)
|
|
|
+
|
|
|
+ val tableSchema = odpsOps.getTableSchema(project, outputTable, isPartition = false)
|
|
|
+
|
|
|
+ // 检查所有字段,收集非法字段
|
|
|
+ val invalidFields = tableSchema.flatMap { case (fieldName, _) =>
|
|
|
+ // 跳过 has_click 和 has_conversion 列
|
|
|
+ if (fieldName != "has_click" && fieldName != "has_conversion") {
|
|
|
+ if (!lowerCaseDenseFeatureNames.contains(fieldName) && !sparseFeatureNames.contains(fieldName)) {
|
|
|
+ Some(fieldName) // 收集缺少字段
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ None
|
|
|
+ }
|
|
|
+ }.toList
|
|
|
+
|
|
|
+ // 如果存在非法字段,抛出标准异常
|
|
|
+ if (invalidFields.nonEmpty) {
|
|
|
+ throw new IllegalArgumentException(s"缺少字段: ${invalidFields.mkString(", ")}")
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
// 3 循环执行数据生产
|
|
|
val dateRange = MyDateUtils.getDateRange(beginStr, endStr)
|
|
|
for (dt <- dateRange) {
|