zhangbo 1 年之前
父节点
当前提交
b8ec66184c

+ 81 - 0
src/main/java/examples/dataloader/OfflineVlogFeatureGroupV1.java

@@ -0,0 +1,81 @@
+package examples.dataloader;
+
+public enum OfflineVlogFeatureGroupV1 {
+
+    machineinfo_brand,
+    machineinfo_model,
+    machineinfo_platform,
+    machineinfo_system,
+    u_1day_exp_cnt,
+    u_1day_click_cnt,
+    u_1day_share_cnt,
+    u_1day_return_cnt,
+    u_1day_ctr,
+    u_1day_str,
+    u_1day_rov,
+    u_1day_ros,
+
+    u_3day_exp_cnt,
+    u_3day_click_cnt,
+    u_3day_share_cnt,
+    u_3day_return_cnt,
+    u_3day_ctr,
+    u_3day_str,
+    u_3day_rov,
+    u_3day_ros,
+
+
+    total_time,
+
+    play_count_total,
+    i_1day_exp_cnt,
+    i_1day_click_cnt,
+    i_1day_share_cnt,
+    i_1day_return_cnt,
+    i_1day_ctr,
+    i_1day_str,
+    i_1day_rov,
+    i_1day_ros,
+
+    i_3day_exp_cnt,
+    i_3day_click_cnt,
+    i_3day_share_cnt,
+    i_3day_return_cnt,
+    i_3day_ctr,
+    i_3day_str,
+    i_3day_rov,
+    i_3day_ros,
+
+    ctx_week,
+    ctx_hour,
+    ctx_region,
+    ctx_city,
+
+    ;
+
+
+    private final byte[] idBytes;
+    private final byte[] nameBytes;
+
+    OfflineVlogFeatureGroupV1() {
+        this.idBytes = String.valueOf(ordinal()).getBytes();
+        this.nameBytes = name().toLowerCase().getBytes();
+    }
+
+    public final int getId() {
+        return ordinal();
+    }
+
+    public final String getGroupName() {
+        return name().toLowerCase();
+    }
+
+    public final byte[] getGroupNameBytes() {
+        return getGroupName().getBytes();
+    }
+
+    public final byte[] getIdBytes() {
+        return idBytes;
+    }
+
+}

+ 66 - 0
src/main/java/examples/dataloader/OfflineVlogShareLRFeatureExtractorV1.java

@@ -0,0 +1,66 @@
+package examples.dataloader;
+
+import com.google.common.collect.ArrayListMultimap;
+import com.google.common.collect.ListMultimap;
+import com.tzld.piaoquan.recommend.feature.domain.video.feature.BytesGroup;
+import com.tzld.piaoquan.recommend.feature.domain.video.feature.BytesUtils;
+import com.tzld.piaoquan.recommend.feature.model.sample.BaseFeature;
+import com.tzld.piaoquan.recommend.feature.model.sample.FeatureGroup;
+
+import java.util.Map;
+
+public class OfflineVlogShareLRFeatureExtractorV1 {
+
+    public ListMultimap<FeatureGroup, BaseFeature> featureMap = ArrayListMultimap.create();
+
+    final private BytesUtils utils;
+    final private int groupCount = OfflineVlogFeatureGroupV1.values().length;
+    public OfflineVlogShareLRFeatureExtractorV1() {
+        BytesGroup[] groups = new BytesGroup[OfflineVlogFeatureGroupV1.values().length];
+        OfflineVlogFeatureGroupV1[] var2 = OfflineVlogFeatureGroupV1.values();
+        int var3 = var2.length;
+
+        for(int var4 = 0; var4 < var3; ++var4) {
+            OfflineVlogFeatureGroupV1 g = var2[var4];
+            groups[g.ordinal()] = new BytesGroup(g.ordinal(), g.getGroupName(), g.getGroupNameBytes());
+        }
+        this.utils = new BytesUtils(groups);
+    }
+    public void makeFeature(Map<String, Object> maps){
+        for (Map.Entry<String, Object> entry : maps.entrySet()){
+            OfflineVlogFeatureGroupV1 ovf = OfflineVlogFeatureGroupV1.valueOf(entry.getKey());
+            Object value = entry.getValue();
+            if (value instanceof String){
+                this.makeFea(ovf, ((String)value).getBytes());
+            }else if (value instanceof Double){
+                this.makeFea(ovf, String.valueOf((Double)value).getBytes());
+            }else if (value instanceof Integer){
+                //todo
+            }else{
+                //todo
+                this.makeFea(ovf, ((String)value).getBytes());
+            }
+        }
+    }
+    public void makeFeature4String(Map<String, String> maps){
+        for (Map.Entry<String, String> entry : maps.entrySet()){
+            OfflineVlogFeatureGroupV1 ovf = OfflineVlogFeatureGroupV1.valueOf(entry.getKey());
+            String value = entry.getValue();
+            this.makeFea(ovf, value.getBytes());
+        }
+    }
+
+    private FeatureGroup makeGroup(OfflineVlogFeatureGroupV1 group) {
+        FeatureGroup.Builder g = FeatureGroup.newBuilder();
+        g.setType("1");
+        g.setName(group.getGroupName());
+        g.setId(group.ordinal());
+        return g.build();
+    }
+    void makeFea(OfflineVlogFeatureGroupV1 group, byte[] value) {
+        FeatureGroup featureGroup = this.makeGroup(group);
+        BaseFeature feature = this.utils.makeFea(group.ordinal(), value);
+        this.featureMap.put(featureGroup, feature);
+    }
+    
+}

+ 55 - 35
src/main/scala/com/aliyun/odps/spark/examples/makedata/makedata_07_strData.scala

@@ -46,49 +46,69 @@ object makedata_07_strData {
         val labelJson = JSON.parseObject(labelStr)
         val label = if (labelJson.containsKey("is_share")) labelJson.getString("is_share") else "0"
         val feaJson = JSON.parseObject(feaStr)
-        val feaSet = Set(
-          "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
-          "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
-          "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
-          "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
-          "total_time", "play_count_total",
-          "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
-          "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
-          "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
-          "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
-          "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
-          "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros",
-
-          "share_uv_list_1day_6_avg", "share_uv_list_1day_6_var", "share_uv_list_1day_diff_6_avg", "share_uv_list_1day_diff_6_var",
-          "return_uv_list_1day_6_avg", "return_uv_list_1day_6_var", "return_uv_list_1day_diff_6_avg", "return_uv_list_1day_diff_6_var",
-          "share_uv_list_1h_6_avg", "share_uv_list_1h_6_var", "share_uv_list_1h_diff_6_avg", "share_uv_list_1h_diff_6_var",
-          "return_uv_list_1h_6_avg", "return_uv_list_1h_6_var", "return_uv_list_1h_diff_6_avg", "return_uv_list_1h_diff_6_var",
-
-          "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
-          "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
-          "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
-          "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
-
-          "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
-          "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h",
-
-          "i_1day_ctr_rt", "i_1day_str_rt", "i_1day_ros_rt", "i_1day_rov_rt",
-          "i_1h_ctr_rt", "i_1h_str_rt", "i_1h_ros_rt", "i_1h_rov_rt"
-        )
-        val feaMap = new util.HashMap[String, String]()
-        feaSet.foreach(r=> {
-          if (feaJson.containsKey(r)){
-            feaMap.put(r, feaJson.getString(r))
-          }
-        })
+
 
         if ("v1".equals(featureVersion)){
+          val feaSet = Set(
+            "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
+            "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
+            "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
+            "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros"
+          )
+          val feaMap = new util.HashMap[String, String]()
+          feaSet.foreach(r => {
+            if (feaJson.containsKey(r)) {
+              feaMap.put(r, feaJson.getString(r))
+            }
+          })
           val bytesFeatureExtractor = new OfflineVlogShareLRFeatureExtractor()
           bytesFeatureExtractor.makeFeature4String(feaMap)
           val featureMap = bytesFeatureExtractor.featureMap
           label + "\t" + featureMap.entries().map(r => r.getValue.getIdentifier + ":1").mkString("\t")
 
         }else if ("v2".equals(featureVersion)){
+          val feaSet = Set(
+            "ctx_week", "ctx_hour", "ctx_region", "ctx_city",
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt",
+            "total_time", "play_count_total",
+            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
+            "u_1day_ctr", "u_1day_str", "u_1day_rov", "u_1day_ros",
+            "u_3day_ctr", "u_3day_str", "u_3day_rov", "u_3day_ros",
+            "i_1day_ctr", "i_1day_str", "i_1day_rov", "i_1day_ros",
+            "i_3day_ctr", "i_3day_str", "i_3day_rov", "i_3day_ros",
+
+            "share_uv_list_1day_6_avg", "share_uv_list_1day_6_var", "share_uv_list_1day_diff_6_avg", "share_uv_list_1day_diff_6_var",
+            "return_uv_list_1day_6_avg", "return_uv_list_1day_6_var", "return_uv_list_1day_diff_6_avg", "return_uv_list_1day_diff_6_var",
+            "share_uv_list_1h_6_avg", "share_uv_list_1h_6_var", "share_uv_list_1h_diff_6_avg", "share_uv_list_1h_diff_6_var",
+            "return_uv_list_1h_6_avg", "return_uv_list_1h_6_var", "return_uv_list_1h_diff_6_avg", "return_uv_list_1h_diff_6_var",
+
+            "view_pv_list_1day", "view_uv_list_1day", "play_pv_list_1day", "play_uv_list_1day",
+            "share_pv_list_1day", "share_uv_list_1day", "return_uv_list_1day",
+            "p_view_uv_list_1day", "p_view_pv_list_1day", "p_return_uv_list_1day",
+            "share_uv_list_2day", "share_pv_list_2day", "share_uv_list_3day", "share_pv_list_3day",
+
+            "view_uv_list_1h", "view_pv_list_1h", "play_uv_list_1h", "play_pv_list_1h",
+            "share_uv_list_1h", "share_pv_list_1h", "return_uv_list_1h", "p_return_uv_list_1h",
+
+            "i_1day_ctr_rt", "i_1day_str_rt", "i_1day_ros_rt", "i_1day_rov_rt",
+            "i_1h_ctr_rt", "i_1h_str_rt", "i_1h_ros_rt", "i_1h_rov_rt"
+          )
+          val feaMap = new util.HashMap[String, String]()
+          feaSet.foreach(r => {
+            if (feaJson.containsKey(r)) {
+              feaMap.put(r, feaJson.getString(r))
+            }
+          })
           val bytesFeatureExtractor = new OfflineVlogShareLRFeatureExtractorV2()
           bytesFeatureExtractor.makeFeature4String(feaMap)
           val featureMap = bytesFeatureExtractor.featureMap

+ 5 - 1
zhangbo/01_train.sh

@@ -8,4 +8,8 @@ $HADOOP fs -text ${train_path}/dt=$day/* | /root/sunmingze/alphaFM/bin/fm_train
 # nohup sh 01_train.sh 20231214 /dw/recommend/model/share_ratio_samples_v2/ model_sharev2 >p1.log 2>&1 &
 # nohup sh 01_train.sh 20231220 /dw/recommend/model/ros_sample_v2/ model_ros_v2 >p_model_ros_v2.log 2>&1 &
 # str 模型路径:/dw/recommend/model/share_ratio_samples_v2
-# ros 模型路径:/dw/recommend/model/ros_sample/
+# ros 模型路径:/dw/recommend/model/ros_sample/
+
+
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/01_str_data model_str_big >p1.log 2>&1 &
+# nohup sh 01_train.sh 20240114 /dw/recommend/model/02_str_data model_str_small >p2.log 2>&1 &