Преглед на файлове

Merge branch 'dev-xym-add-features' of algorithm/recommend-emr-dataprocess into feature/20250104-zt-update

xueyiming преди 1 ден
родител
ревизия
e39a966b47

+ 31 - 1
src/main/scala/com/aliyun/odps/spark/examples/makedata_ad/v20240718/makedata_ad_33_bucketDataFromOriginToHive_20250228.scala

@@ -76,7 +76,9 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
       "user_cid_click_list", "user_cid_conver_list",
       "user_vid_return_tags_2h", "user_vid_return_tags_1d", "user_vid_return_tags_3d", "user_vid_return_tags_7d",
       "user_vid_return_tags_14d", "apptype", "ts", "mid", "pqtid", "hour", "hour_quarter", "root_source_scene",
-      "root_source_channel", "is_first_layer", "title_split", "profession", "user_vid_share_tags_1d", "user_vid_share_tags_14d")
+      "root_source_channel", "is_first_layer", "title_split", "profession", "user_vid_share_tags_1d", "user_vid_share_tags_14d",
+      "user_vid_return_cate1_14d", "user_vid_return_cate2_14d", "user_vid_share_cate1_14d", "user_vid_share_cate2_14d",
+      "creative_type", "creative_hook_embedding", "creative_why_embedding", "creative_action_embedding")
 
 
     // 2 读取odps+表信息
@@ -185,6 +187,18 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
               if (b1.containsKey("profession") && b1.getString("profession").nonEmpty) {
                 featureMap.put("profession", b1.getString("profession"))
               }
+              if (b1.containsKey("creative_type") && b1.getString("creative_type").nonEmpty) {
+                featureMap.put("creative_type", b1.getString("creative_type"))
+              }
+              if (b1.containsKey("creative_hook_embedding") && b1.getString("creative_hook_embedding").nonEmpty) {
+                featureMap.put("creative_hook_embedding", b1.getString("creative_hook_embedding").split('|').map(_.toDouble).map(_.toFloat).mkString("|"))
+              }
+              if (b1.containsKey("creative_why_embedding") && b1.getString("creative_why_embedding").nonEmpty) {
+                featureMap.put("creative_why_embedding", b1.getString("creative_why_embedding").split('|').map(_.toDouble).map(_.toFloat).mkString("|"))
+              }
+              if (b1.containsKey("creative_action_embedding") && b1.getString("creative_action_embedding").nonEmpty) {
+                featureMap.put("creative_action_embedding", b1.getString("creative_action_embedding").split('|').map(_.toDouble).map(_.toFloat).mkString("|"))
+              }
 
               val hour = DateTimeUtil.getHourByTimestamp(ts)
               featureMap.put("hour_" + hour, idDefaultValue)
@@ -435,6 +449,22 @@ object makedata_ad_33_bucketDataFromOriginToHive_20250228 {
                 featureMap.put("user_vid_share_tags_14d", e2.getString("tags_14d"))
               }
 
+              val g1: JSONObject = if (record.isNull("g1_feature")) new JSONObject() else
+                JSON.parseObject(record.getString("g1_feature"))
+              val g2: JSONObject = if (record.isNull("g2_feature")) new JSONObject() else
+                JSON.parseObject(record.getString("g2_feature"))
+              if (g1.containsKey("user_vid_play_cate1_14d") && g1.getString("user_vid_play_cate1_14d").nonEmpty) {
+                featureMap.put("user_vid_return_cate1_14d", g1.getString("user_vid_play_cate1_14d"))
+              }
+              if (g1.containsKey("user_vid_play_cate2_14d") && g1.getString("user_vid_play_cate2_14d").nonEmpty) {
+                featureMap.put("user_vid_return_cate2_14d", g1.getString("user_vid_play_cate2_14d"))
+              }
+              if (g2.containsKey("user_vid_share_cate1_14d") && g2.getString("user_vid_share_cate1_14d").nonEmpty) {
+                featureMap.put("user_vid_share_cate1_14d", g2.getString("user_vid_share_cate1_14d"))
+              }
+              if (g2.containsKey("user_vid_share_cate2_14d") && g2.getString("user_vid_share_cate2_14d").nonEmpty) {
+                featureMap.put("user_vid_share_cate2_14d", g2.getString("user_vid_share_cate2_14d"))
+              }
 
               val d1: JSONObject = if (record.isNull("d1_feature")) new JSONObject() else
                 JSON.parseObject(record.getString("d1_feature"))