浏览代码

Merge branch 'dev-xym-add-features' of algorithm/recommend-emr-dataprocess into feature/20250104-zt-update

xueyiming 3 天之前
父节点
当前提交
7f2d2f6479

+ 33 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250228.scala

@@ -78,7 +78,13 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
       "user_vid_return_tags_14d", "apptype", "ts", "mid", "pqtid", "hour", "hour_quarter", "root_source_scene",
       "root_source_channel", "is_first_layer", "title_split", "profession", "user_vid_share_tags_1d", "user_vid_share_tags_14d",
       "user_vid_return_cate1_14d", "user_vid_return_cate2_14d", "user_vid_share_cate1_14d", "user_vid_share_cate2_14d",
-      "creative_type", "creative_hook_embedding", "creative_why_embedding", "creative_action_embedding","user_has_conver_1y")
+      "creative_type", "creative_hook_embedding", "creative_why_embedding", "creative_action_embedding", "user_has_conver_1y",
+      "user_adverid_view_3d", "user_adverid_view_7d", "user_adverid_view_30d",
+      "user_adverid_click_3d", "user_adverid_click_7d", "user_adverid_click_30d",
+      "user_adverid_conver_3d", "user_adverid_conver_7d", "user_adverid_conver_30d",
+      "user_skuid_view_3d", "user_skuid_view_7d", "user_skuid_view_30d",
+      "user_skuid_click_3d", "user_skuid_click_7d", "user_skuid_click_30d",
+      "user_skuid_conver_3d", "user_skuid_conver_7d", "user_skuid_conver_30d")
 
 
     // 2 读取odps+表信息
@@ -470,6 +476,32 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
                 featureMap.put("user_vid_share_cate2_14d", g2.getString("cate2_14d"))
               }
 
+
+              val h1: JSONObject = if (record.isNull("h1_feature")) new JSONObject() else JSON.parseObject(record.getString("h1_feature"))
+              val h2: JSONObject = if (record.isNull("h2_feature")) new JSONObject() else JSON.parseObject(record.getString("h2_feature"))
+
+              // 定义时间维度和对应的前缀
+              val timeDimensions = Seq("3d", "7d", "30d")
+              for (dimension <- timeDimensions) {
+                if (h1.containsKey(dimension) && h1.getString(dimension).nonEmpty) {
+                  val action = h1.getString(dimension).split(",")
+                  if (action.length >= 3) {
+                    featureMap.put(s"user_adverid_view_${dimension}", action(0))
+                    featureMap.put(s"user_adverid_click_${dimension}", action(1))
+                    featureMap.put(s"user_adverid_conver_${dimension}", action(2))
+                  }
+                }
+                if (h2.containsKey(dimension) && h2.getString(dimension).nonEmpty) {
+                  val action = h2.getString(dimension).split(",")
+                  if (action.length >= 3) {
+                    featureMap.put(s"user_skuid_view_${dimension}", action(0))
+                    featureMap.put(s"user_skuid_click_${dimension}", action(1))
+                    featureMap.put(s"user_skuid_conver_${dimension}", action(2))
+                  }
+                }
+              }
+
+
               val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else
                 JSON.parseObject(record.getString("d1_feature"))
               val d2: JSONObject = if (record.isNull("d2_feature")) new JSONObject() else