zhangbo 1 year ago
parent
commit
c26fdce7f9

+ 32 - 0
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_07_strData.scala

@@ -140,6 +140,38 @@ object makedata_07_strData {
           bytesFeatureExtractor.makeFeature4String(feaMap)
           val featureMap = bytesFeatureExtractor.featureMap
           label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
+        } else if ("v5".equals(featureVersion)) {
+          val feaSet = Set(
+            "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
+            "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
+            "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
+            "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros",
+
+            "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
+            "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
+            "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
+            "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
+
+            "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
+            "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h",
+          )
+          val feaMap = new util.HashMap[String, String]()
+          feaSet.foreach(r => {
+            if (feaJson.containsKey(r)) {
+              feaMap.put(r, feaJson.getString(r))
+            }
+          })
+          val bytesFeatureExtractor = new OfflineVlogShareLRFeatureExtractorV2()
+          bytesFeatureExtractor.makeFeature4String(feaMap)
+          val featureMap = bytesFeatureExtractor.featureMap
+          label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
         }
 
       })

+ 2 - 1
zhangbo/01_train.sh

@@ -17,4 +17,5 @@ $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_train
 
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/01_str_data model_str_big >p1.log 2>&1 &
 # nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small >p2.log 2>&1 &
-# nohup sh 01_train.sh 20240114 /dw/recommend/model/03_str_data model_str_mid >p3.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/03_str_data model_str_mid >p3.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/04_str_data model_str_mid2 >p4_train.log 2>&1 &