Parcourir la source

video feature extractor

sunmingze il y a 1 an
Parent
commit
6f3aaa8da3
22 fichiers modifiés avec 141 ajouts et 1639 suppressions
  1. 7 0
      pom.xml
  2. 0 14
      src/main/java/com/tzld/piaoquan/data/base/Constant.java
  3. 0 138
      src/main/java/com/tzld/piaoquan/data/base/ItemFeature.java
  4. 0 122
      src/main/java/com/tzld/piaoquan/data/base/RequestContext.java
  5. 0 44
      src/main/java/com/tzld/piaoquan/data/base/RequestContextBytesFeature.java
  6. 0 108
      src/main/java/com/tzld/piaoquan/data/base/UserActionFeature.java
  7. 0 88
      src/main/java/com/tzld/piaoquan/data/base/UserBytesFeature.java
  8. 0 85
      src/main/java/com/tzld/piaoquan/data/base/UserFeature.java
  9. 0 93
      src/main/java/com/tzld/piaoquan/data/base/VideoBytesFeature.java
  10. 0 131
      src/main/java/com/tzld/piaoquan/data/base/VlogFeatureGroup.java
  11. 0 35
      src/main/java/com/tzld/piaoquan/data/score/feature/BytesGroup.java
  12. 0 192
      src/main/java/com/tzld/piaoquan/data/score/feature/BytesUtils.java
  13. 0 230
      src/main/java/com/tzld/piaoquan/data/score/feature/FeatureHash.java
  14. 0 67
      src/main/java/com/tzld/piaoquan/data/score/feature/LRBytesFeatureExtractorBase.java
  15. 0 152
      src/main/java/com/tzld/piaoquan/data/score/feature/VlogShareLRFeatureExtractor.java
  16. 1 22
      src/main/java/examples/dataloader/AdRedisFeatureConstructor.java
  17. 1 34
      src/main/java/examples/dataloader/RecommRedisFeatureConstructor.java
  18. 3 5
      src/main/java/examples/dataloader/RecommendSampleConstructor.java
  19. 0 1
      src/main/java/examples/sparksql/SparkAdCTRSampleLoader.java
  20. 5 4
      src/main/java/examples/sparksql/SparkShareRatioSampleLoader.java
  21. 0 74
      src/main/java/examples/sparksql/SparkUserFeaToRedisLoader.java
  22. 124 0
      src/main/java/examples/sparksql/SparkVideoFeaToRedisLoader.java

+ 7 - 0
pom.xml

@@ -45,6 +45,13 @@
             <version>1.0.1</version>
         </dependency>
 
+        <dependency>
+            <groupId>com.tzld.piaoquan</groupId>
+            <artifactId>recommend-feature-client</artifactId>
+            <version>1.0.0</version>
+        </dependency>
+
+
         <dependency>
             <groupId>com.tzld.piaoquan</groupId>
             <artifactId>ad-engine-commons</artifactId>

+ 0 - 14
src/main/java/com/tzld/piaoquan/data/base/Constant.java

@@ -1,14 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-/**
- * 常量
- *
- * @author supeng
- * @date 2020/08/19
- */
-public class Constant {
-    /**
-     * traceID
-     */
-    public static final String LOG_TRACE_ID = "logTraceId";
-}

+ 0 - 138
src/main/java/com/tzld/piaoquan/data/base/ItemFeature.java

@@ -1,138 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import lombok.Getter;
-import lombok.NoArgsConstructor;
-
-@Getter
-@NoArgsConstructor
-public class ItemFeature {
-    private String videoId;
-
-    private String upId;
-
-    private String tags;
-
-    /**
-     * 有多个标题,暂时不会用到所以先不处理
-     * @since 2023-12-05
-     */
-    private String title;
-
-    private String titleLength;
-
-    private String playLength;
-
-    private String totalTime;
-
-    private String daysSinceUpload;
-
-
-    // 当天统计量信息
-    private UserActionFeature day1_cnt_features;
-    // 3天内统计量
-    private UserActionFeature day3_cnt_features;
-    // 7天内统计量
-    private UserActionFeature day7_cnt_features;
-    // 3个月统计量
-    private UserActionFeature month3_cnt_features;
-
-
-    public void setVideoId(String key){
-        if(key == null){
-            this.videoId = "0";
-        } else {
-            this.videoId = key;
-        }
-    }
-
-    public void setUpId(String key){
-        if(key == null){
-            this.upId = "0";
-        } else {
-            this.upId = key;
-        }
-    }
-
-    public void setTags(String key){
-        if(key == null){
-            this.tags = "0";
-        } else {
-            this.tags = key;
-        }
-    }
-
-    public void setTitle(String key){
-        if(key == null){
-            this.title = "0";
-        } else {
-            this.title = key;
-        }
-    }
-
-
-    public void setDay1_cnt_features(UserActionFeature feature){
-        this.day1_cnt_features = feature;
-    }
-
-
-    public void setDay3_cnt_features(UserActionFeature feature){
-        this.day3_cnt_features = feature;
-
-    }
-
-    public void setDay7_cnt_features(UserActionFeature feature){
-        this.day7_cnt_features = feature;
-
-    }
-
-    public void setMonth3_cnt_features(UserActionFeature feature){
-        this.month3_cnt_features= feature;
-
-    }
-
-    public void setTitleLength(String key) {
-        if(key == null){
-            this.titleLength = "0";
-        } else {
-            this.titleLength = key;
-        }
-    }
-
-
-    public void setDaysSinceUpload(String key) {
-        if(key == null){
-            this.daysSinceUpload = "0";
-        } else {
-            this.daysSinceUpload = key;
-        }
-    }
-
-    public void setPlayLength(String key) {
-        if(key == null){
-            this.playLength = "0";
-        } else {
-            this.playLength = key;
-        }
-    }
-
-    public void setTotalTime(String key) {
-        if(key == null){
-            this.totalTime = "0";
-        } else {
-            this.totalTime = key;
-        }
-    }
-
-    public String getKey() {
-        return this.videoId;
-    }
-
-    public String getValue(){
-        Gson gson = new GsonBuilder().serializeSpecialFloatingPointValues().create();
-        return gson.toJson(this);
-    }
-
-
-}

+ 0 - 122
src/main/java/com/tzld/piaoquan/data/base/RequestContext.java

@@ -1,122 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-
-import lombok.Getter;
-import lombok.NoArgsConstructor;
-
-@Getter
-@NoArgsConstructor
-public class RequestContext {
-
-    private String request_id;
-    // 机型等信息
-    private String apptype;
-    private String machineinfo_brand;
-    private String machineinfo_model;
-    private String machineinfo_platform;
-    private String machineinfo_sdkversion;
-    private String machineinfo_system;
-    private String machineinfo_wechatversion;
-
-    // 时间等信息
-    private String day;
-    private String week;
-    private String hour;
-    private String region;
-    private String city;
-
-    public void setApptype(String apptype) {
-        this.apptype = apptype;
-        if(apptype == null)
-            this.apptype = "-1";
-    }
-
-    public void setMachineinfo_brand(String machineinfo_brand) {
-        this.machineinfo_brand = machineinfo_brand;
-        if(machineinfo_brand == null)
-            this.machineinfo_brand = "-1";
-    }
-
-    public void setMachineinfo_model(String machineinfo_model) {
-        this.machineinfo_model = machineinfo_model;
-        if(machineinfo_model == null)
-            this.machineinfo_model = "-1";
-    }
-
-
-    public void setMachineinfo_wechatversion(String machineinfo_wechatversion) {
-        this.machineinfo_wechatversion = machineinfo_wechatversion;
-        if(machineinfo_wechatversion == null)
-            this.machineinfo_wechatversion = "-1";
-    }
-
-
-    public void setMachineinfo_sdkversion(String machineinfo_sdkversion) {
-        this.machineinfo_sdkversion = machineinfo_sdkversion;
-        if(machineinfo_sdkversion == null)
-            this.machineinfo_sdkversion = "-1";
-    }
-
-    public void setMachineinfo_platform(String machineinfo_platform) {
-        this.machineinfo_platform = machineinfo_platform;
-        if(machineinfo_platform == null)
-            this.machineinfo_platform = "-1";
-    }
-
-    public void setMachineinfo_system(String machineinfo_system) {
-        this.machineinfo_system = machineinfo_system;
-        if(machineinfo_system == null)
-            this.machineinfo_system = "-1";
-    }
-
-
-
-    public void setHour(String hour) {
-        this.hour = hour;
-        if(hour == null)
-            this.hour = "-1";
-    }
-
-
-    public void setDay(String day) {
-        this.day = day;
-        if(day == null)
-            this.day = "-1";
-    }
-
-    public void setWeek(String week) {
-        this.week = week;
-        if(week == null)
-            this.week = "-1";
-    }
-
-
-    public void setRegion(String region) {
-        this.region = region;
-        if(region == null)
-            this.region = "-1";
-    }
-
-
-    public void setCity(String city) {
-        this.city = city;
-        if(city == null)
-            this.city = "-1";
-    }
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-}

+ 0 - 44
src/main/java/com/tzld/piaoquan/data/base/RequestContextBytesFeature.java

@@ -1,44 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-
-import lombok.Data;
-
-@Data
-public class RequestContextBytesFeature {
-    private final byte[] apptype;
-    private final byte[]  machineinfo_brand;
-    private final byte[]  machineinfo_model;
-    private final byte[]  machineinfo_platform;
-    private final byte[]  machineinfo_sdkversion;
-    private final byte[]  machineinfo_system;
-    private final byte[]  machineinfo_wechatversion;
-
-    // 时间等信息
-    private final byte[]  day;
-    private final byte[]  week;
-    private final byte[]  hour;
-    private final byte[]  region;
-    private final byte[]  city;
-
-
-    public RequestContextBytesFeature(RequestContext requestContext){
-        apptype = requestContext.getApptype().getBytes();
-        machineinfo_brand = requestContext.getMachineinfo_brand().getBytes();
-        machineinfo_model = requestContext.getMachineinfo_model().getBytes();
-        machineinfo_platform = requestContext.getMachineinfo_platform().getBytes();
-        machineinfo_sdkversion = requestContext.getMachineinfo_sdkversion().getBytes();
-        machineinfo_system = requestContext.getMachineinfo_system().getBytes();
-        machineinfo_wechatversion = requestContext.getMachineinfo_wechatversion().getBytes();
-
-        day = requestContext.getDay().getBytes();
-        week = requestContext.getWeek().getBytes();
-        hour = requestContext.getHour().getBytes();
-        region = requestContext.getRegion().getBytes();
-        city = requestContext.getCity().getBytes();
-    }
-
-
-
-
-
-}

+ 0 - 108
src/main/java/com/tzld/piaoquan/data/base/UserActionFeature.java

@@ -1,108 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-import lombok.Data;
-
-@Data
-public class UserActionFeature {
-    private double exp_cnt;
-    private double click_cnt;
-    private double share_cnt;
-    private double return_cnt;
-
-    private double ctr;
-    private double str;
-    private double rov;
-    private double ros;
-
-    private double ceilLog(Double key) {
-        return Math.ceil(Math.log(key));
-    }
-
-    private double bucketRatioFeature(Double key) {
-        long bucket = Math.round(Math.log(key * 100));
-        if( bucket > 100)
-            bucket = 100;
-        return (double) bucket;
-    }
-
-
-    public void setExp_cnt(Object key){
-        if(key == null ) {
-            this.exp_cnt = 0.0;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.exp_cnt = ceilLog(Double.valueOf(formateKey));
-        }
-    }
-
-    public void setClick_cnt(Object key){
-        if(key == null ){
-            this.click_cnt = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.click_cnt = ceilLog(Double.valueOf(formateKey));
-        }
-    }
-    public void setShare_cnt(Object key){
-        if(key == null ){
-            this.share_cnt = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.share_cnt = ceilLog(Double.valueOf(formateKey));
-        }
-    }
-    public void setReturn_cnt(Object key){
-        if(key == null ){
-            this.return_cnt = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.return_cnt = ceilLog(Double.valueOf(formateKey));
-        }
-    }
-
-    public void setCtr(Object key){
-        if(key == null ){
-            this.ctr = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.ctr = bucketRatioFeature(Double.valueOf(formateKey));
-        }
-    }
-
-    public void setStr(Object key){
-        if(key == null ){
-            this.str = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.str = bucketRatioFeature(Double.valueOf(formateKey));
-        }
-    }
-
-    public void setRov(Object key){
-        if(key == null ){
-            this.rov = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.rov = bucketRatioFeature(Double.valueOf(formateKey));
-        }
-    }
-
-    public void setRos(Object key){
-        if(key == null ){
-            this.ros = 0.0 ;
-        } else {
-            String formateKey = key.toString().replace("\\N", "-1");
-            this.ros = bucketRatioFeature(Double.valueOf(formateKey));
-        }
-    }
-
-
-
-
-
-
-
-
-
-
-}

+ 0 - 88
src/main/java/com/tzld/piaoquan/data/base/UserBytesFeature.java

@@ -1,88 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-import lombok.Data;
-
-import java.util.HashMap;
-import java.util.Map;
-import com.tzld.piaoquan.data.base.UserFeature;
-
-@Data
-public class UserBytesFeature {
-
-    private final byte[]  uid;
-
-    // 当天统计量信息
-    private Map<String, byte[]> day1_cnt_features;
-    // 3天内统计量
-    private Map<String, byte[]> day3_cnt_features;
-    // 7天内统计量
-    private Map<String, byte[]> day7_cnt_features;
-    // 3个月统计量
-    private Map<String, byte[]> month3_cnt_features;
-    // 用户行为周期
-    private final byte[]  user_cycle_bucket_7days;
-    private final byte[]  user_cycle_bucket_30days;
-    private final byte[]  user_share_bucket_30days;
-
-
-    public UserBytesFeature(UserFeature feature) {
-        this.uid = feature.getUid().getBytes();
-        this.user_cycle_bucket_7days = feature.getUser_cycle_bucket_7days().getBytes();
-        this.user_cycle_bucket_30days = feature.getUser_cycle_bucket_30days().getBytes();
-        this.user_share_bucket_30days = feature.getUser_share_bucket_30days().getBytes();
-
-        this.day1_cnt_features = new HashMap<String, byte[]>();
-        // 1 day statistic
-        this.day1_cnt_features.put("exp", String.valueOf(feature.getDay1_cnt_features().getExp_cnt()).getBytes());
-        this.day1_cnt_features.put("click", String.valueOf(feature.getDay1_cnt_features().getClick_cnt()).getBytes());
-        this.day1_cnt_features.put("share", String.valueOf(feature.getDay1_cnt_features().getShare_cnt()).getBytes());
-        this.day1_cnt_features.put("return", String.valueOf(feature.getDay1_cnt_features().getReturn_cnt()).getBytes());
-        this.day1_cnt_features.put("ctr", String.valueOf(feature.getDay1_cnt_features().getCtr()).getBytes());
-        this.day1_cnt_features.put("str", String.valueOf(feature.getDay1_cnt_features().getStr()).getBytes());
-        this.day1_cnt_features.put("rov", String.valueOf(feature.getDay1_cnt_features().getRov()).getBytes());
-        this.day1_cnt_features.put("ros", String.valueOf(feature.getDay1_cnt_features().getRos()).getBytes());
-
-
-
-        // 3 day statistic
-        this.day3_cnt_features = new HashMap<String, byte[]>();
-        day3_cnt_features.put("exp", String.valueOf(feature.getDay3_cnt_features().getExp_cnt()).getBytes());
-        day3_cnt_features.put("click", String.valueOf(feature.getDay3_cnt_features().getClick_cnt()).getBytes());
-        day3_cnt_features.put("share", String.valueOf(feature.getDay3_cnt_features().getShare_cnt()).getBytes());
-        day3_cnt_features.put("return", String.valueOf(feature.getDay3_cnt_features().getReturn_cnt()).getBytes());
-        day3_cnt_features.put("ctr", String.valueOf(feature.getDay3_cnt_features().getCtr()).getBytes());
-        day3_cnt_features.put("str", String.valueOf(feature.getDay3_cnt_features().getStr()).getBytes());
-        day3_cnt_features.put("rov", String.valueOf(feature.getDay3_cnt_features().getRov()).getBytes());
-        day3_cnt_features.put("ros", String.valueOf(feature.getDay3_cnt_features().getRos()).getBytes());
-
-
-        // 7 day statistic
-        this.day7_cnt_features = new HashMap<String, byte[]>();
-        day7_cnt_features.put("exp", String.valueOf(feature.getDay7_cnt_features().getExp_cnt()).getBytes());
-        day7_cnt_features.put("click", String.valueOf(feature.getDay7_cnt_features().getClick_cnt()).getBytes());
-        day7_cnt_features.put("share", String.valueOf(feature.getDay7_cnt_features().getShare_cnt()).getBytes());
-        day7_cnt_features.put("return", String.valueOf(feature.getDay7_cnt_features().getReturn_cnt()).getBytes());
-        day7_cnt_features.put("ctr", String.valueOf(feature.getDay7_cnt_features().getCtr()).getBytes());
-        day7_cnt_features.put("str", String.valueOf(feature.getDay7_cnt_features().getStr()).getBytes());
-        day7_cnt_features.put("rov", String.valueOf(feature.getDay7_cnt_features().getRov()).getBytes());
-        day7_cnt_features.put("ros", String.valueOf(feature.getDay7_cnt_features().getRos()).getBytes());
-
-
-
-        // 3 month statisic
-        this.month3_cnt_features = new HashMap<String, byte[]>();
-        month3_cnt_features.put("exp", String.valueOf(feature.getMonth3_cnt_features().getExp_cnt()).getBytes());
-        month3_cnt_features.put("click", String.valueOf(feature.getMonth3_cnt_features().getClick_cnt()).getBytes());
-        month3_cnt_features.put("share", String.valueOf(feature.getMonth3_cnt_features().getShare_cnt()).getBytes());
-        month3_cnt_features.put("return", String.valueOf(feature.getMonth3_cnt_features().getReturn_cnt()).getBytes());
-        month3_cnt_features.put("ctr", String.valueOf(feature.getMonth3_cnt_features().getCtr()).getBytes());
-        month3_cnt_features.put("str", String.valueOf(feature.getMonth3_cnt_features().getStr()).getBytes());
-        month3_cnt_features.put("rov", String.valueOf(feature.getMonth3_cnt_features().getRov()).getBytes());
-        month3_cnt_features.put("ros", String.valueOf(feature.getMonth3_cnt_features().getRos()).getBytes());
-
-
-    }
-
-
-
-}

+ 0 - 85
src/main/java/com/tzld/piaoquan/data/base/UserFeature.java

@@ -1,85 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-import com.google.gson.Gson;
-import com.google.gson.GsonBuilder;
-import lombok.Getter;
-import lombok.NoArgsConstructor;
-
-@Getter
-@NoArgsConstructor
-public class UserFeature {
-    private String uid;
-    // 当天统计量信息
-    private UserActionFeature day1_cnt_features;
-    // 3天内统计量
-    private UserActionFeature day3_cnt_features;
-    // 7天内统计量
-    private UserActionFeature day7_cnt_features;
-    // 3个月统计量
-    private UserActionFeature month3_cnt_features;
-    // 用户行为周期
-    private String user_cycle_bucket_7days;
-    private String user_cycle_bucket_30days;
-    private String user_share_bucket_30days;
-
-    public void setUid(String key){
-        this.uid = key;
-        if(key == null)
-            this.uid = "0";
-    }
-
-
-    public void setDay1_cnt_features(UserActionFeature key){
-        this.day1_cnt_features = key;
-        if(key == null)
-            this.day1_cnt_features = new UserActionFeature();
-    }
-
-    public void setDay3_cnt_features(UserActionFeature key){
-        this.day3_cnt_features = key;
-        if(key == null)
-            this.day3_cnt_features = new UserActionFeature();
-    }
-
-    public void setDay7_cnt_features(UserActionFeature key){
-        this.day7_cnt_features = key;
-        if(key == null)
-            this.day7_cnt_features = new UserActionFeature();
-    }
-
-    public void setMonth3_cnt_features(UserActionFeature key) {
-        this.month3_cnt_features = key;
-        if(key == null)
-            this.month3_cnt_features = new UserActionFeature();
-    }
-
-
-    public void setUser_cycle_bucket_7days(String key){
-        this.user_cycle_bucket_7days = key;
-        if(key == null)
-            this.user_cycle_bucket_7days = "0";
-    }
-
-    public void setUser_cycle_bucket_30days(String key){
-        this.user_cycle_bucket_30days = key;
-        if(key == null)
-            this.user_cycle_bucket_30days = "0";
-    }
-
-    public void setUser_share_bucket_30days(String key){
-        this.user_share_bucket_30days = key;
-        if(key == null)
-            this.user_share_bucket_30days = "0";
-    }
-
-
-    public String getKey() {
-        return this.uid;
-    }
-
-    public String getValue(){
-        Gson gson = new GsonBuilder().serializeSpecialFloatingPointValues().create();
-        return gson.toJson(this);
-    }
-
-}

+ 0 - 93
src/main/java/com/tzld/piaoquan/data/base/VideoBytesFeature.java

@@ -1,93 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-import lombok.Data;
-
-import java.util.HashMap;
-import java.util.Map;
-@Data
-public class VideoBytesFeature {
-    private final byte[] videoId;
-
-    private final byte[] upId;
-
-    private final byte[] titleLength;
-
-    private final byte[] playLength;
-
-    private final byte[] totolTime;
-
-    private final byte[] daysSinceUpload;
-
-
-    // 当天统计量信息
-    private Map<String, byte[]> item_day1_cnt_features;
-    // 3天内统计量
-    private Map<String, byte[]> item_day3_cnt_features;
-    // 7天内统计量
-    private Map<String, byte[]> item_day7_cnt_features;
-    // 3个月统计量
-    private Map<String, byte[]> item_month3_cnt_features;
-
-    public VideoBytesFeature(ItemFeature feature) {
-        videoId  = feature.getVideoId().getBytes();
-        upId  = feature.getUpId().getBytes();
-
-        titleLength  = feature.getTitleLength().getBytes();
-        playLength  = feature.getPlayLength().getBytes();
-        totolTime  = feature.getTotalTime().getBytes();
-        daysSinceUpload  = feature.getDaysSinceUpload().getBytes();
-        // 1day
-
-
-        // 1 day statistic
-        item_day1_cnt_features = new HashMap<String, byte[]>();
-        item_day1_cnt_features.put("exp", String.valueOf(feature.getDay1_cnt_features().getExp_cnt()).getBytes());
-        item_day1_cnt_features.put("click", String.valueOf(feature.getDay1_cnt_features().getClick_cnt()).getBytes());
-        item_day1_cnt_features.put("share", String.valueOf(feature.getDay1_cnt_features().getShare_cnt()).getBytes());
-        item_day1_cnt_features.put("return", String.valueOf(feature.getDay1_cnt_features().getReturn_cnt()).getBytes());
-        item_day1_cnt_features.put("ctr", String.valueOf(feature.getDay1_cnt_features().getCtr()).getBytes());
-        item_day1_cnt_features.put("str", String.valueOf(feature.getDay1_cnt_features().getStr()).getBytes());
-        item_day1_cnt_features.put("rov", String.valueOf(feature.getDay1_cnt_features().getRov()).getBytes());
-        item_day1_cnt_features.put("ros", String.valueOf(feature.getDay1_cnt_features().getRos()).getBytes());
-
-
-
-        // 3 day statistic
-        item_day3_cnt_features = new HashMap<String, byte[]>();
-        item_day3_cnt_features.put("exp", String.valueOf(feature.getDay3_cnt_features().getExp_cnt()).getBytes());
-        item_day3_cnt_features.put("click", String.valueOf(feature.getDay3_cnt_features().getClick_cnt()).getBytes());
-        item_day3_cnt_features.put("share", String.valueOf(feature.getDay3_cnt_features().getShare_cnt()).getBytes());
-        item_day3_cnt_features.put("return", String.valueOf(feature.getDay3_cnt_features().getReturn_cnt()).getBytes());
-        item_day3_cnt_features.put("ctr", String.valueOf(feature.getDay3_cnt_features().getCtr()).getBytes());
-        item_day3_cnt_features.put("str", String.valueOf(feature.getDay3_cnt_features().getStr()).getBytes());
-        item_day3_cnt_features.put("rov", String.valueOf(feature.getDay3_cnt_features().getRov()).getBytes());
-        item_day3_cnt_features.put("ros", String.valueOf(feature.getDay3_cnt_features().getRos()).getBytes());
-
-
-        // 7 day statistic
-        item_day7_cnt_features = new HashMap<String, byte[]>();
-        item_day7_cnt_features.put("exp", String.valueOf(feature.getDay7_cnt_features().getExp_cnt()).getBytes());
-        item_day7_cnt_features.put("click", String.valueOf(feature.getDay7_cnt_features().getClick_cnt()).getBytes());
-        item_day7_cnt_features.put("share", String.valueOf(feature.getDay7_cnt_features().getShare_cnt()).getBytes());
-        item_day7_cnt_features.put("return", String.valueOf(feature.getDay7_cnt_features().getReturn_cnt()).getBytes());
-        item_day7_cnt_features.put("ctr", String.valueOf(feature.getDay7_cnt_features().getCtr()).getBytes());
-        item_day7_cnt_features.put("str", String.valueOf(feature.getDay7_cnt_features().getStr()).getBytes());
-        item_day7_cnt_features.put("rov", String.valueOf(feature.getDay7_cnt_features().getRov()).getBytes());
-        item_day7_cnt_features.put("ros", String.valueOf(feature.getDay7_cnt_features().getRos()).getBytes());
-
-
-
-        // 3 month statisic
-        item_month3_cnt_features = new HashMap<String, byte[]>();
-        item_month3_cnt_features.put("exp", String.valueOf(feature.getMonth3_cnt_features().getExp_cnt()).getBytes());
-        item_month3_cnt_features.put("click", String.valueOf(feature.getMonth3_cnt_features().getClick_cnt()).getBytes());
-        item_month3_cnt_features.put("share", String.valueOf(feature.getMonth3_cnt_features().getShare_cnt()).getBytes());
-        item_month3_cnt_features.put("return", String.valueOf(feature.getMonth3_cnt_features().getReturn_cnt()).getBytes());
-        item_month3_cnt_features.put("ctr", String.valueOf(feature.getMonth3_cnt_features().getCtr()).getBytes());
-        item_month3_cnt_features.put("str", String.valueOf(feature.getMonth3_cnt_features().getStr()).getBytes());
-        item_month3_cnt_features.put("rov", String.valueOf(feature.getMonth3_cnt_features().getRov()).getBytes());
-        item_month3_cnt_features.put("ros", String.valueOf(feature.getMonth3_cnt_features().getRos()).getBytes());
-
-    }
-
-}

+ 0 - 131
src/main/java/com/tzld/piaoquan/data/base/VlogFeatureGroup.java

@@ -1,131 +0,0 @@
-package com.tzld.piaoquan.data.base;
-
-public enum VlogFeatureGroup {
-
-    // video
-    APPTYP,
-    VIDEOID,
-    MID,
-    UID,
-    MACHINEINFO_BRAND,
-    MACHINEINFO_MODEL,
-    MACHINEINFO_PLATFORM,
-    MACHINEINFO_SDKVERSION,
-    MACHINEINFO_SYSTEM,
-    MACHINEINFO_WECHATVERSION,
-    UP_ID,
-    TITLE_LEN,
-    PLAY_LEN,
-    TOTAL_TIME,
-    DAYS_SINCE_UPLOAD,
-    DAY,
-    WEEK,
-    HOUR,
-    REGION,
-    CITY,
-
-    USER_1DAY_EXP,
-    USER_1DAY_CLICK,
-    USER_1DAY_SHARE,
-    USER_1DAY_RETURN,
-    USER_1DAY_CTR,
-    USER_1DAY_STR,
-    USER_1DAY_ROV,
-    USER_1DAY_ROS,
-
-    USER_3DAY_EXP,
-    USER_3DAY_CLICK,
-    USER_3DAY_SHARE,
-    USER_3DAY_RETURN,
-    USER_3DAY_CTR,
-    USER_3DAY_STR,
-    USER_3DAY_ROV,
-    USER_3DAY_ROS,
-
-    USER_7DAY_EXP,
-    USER_7DAY_CLICK,
-    USER_7DAY_SHARE,
-    USER_7DAY_RETURN,
-    USER_7DAY_CTR,
-    USER_7DAY_STR,
-    USER_7DAY_ROV,
-    USER_7DAY_ROS,
-
-    USER_3MONTH_EXP,
-    USER_3MONTH_CLICK,
-    USER_3MONTH_SHARE,
-    USER_3MONTH_RETURN,
-    USER_3MONTH_CTR,
-    USER_3MONTH_STR,
-    USER_3MONTH_ROV,
-    USER_3MONTH_ROS,
-
-
-    ITEM_1DAY_EXP,
-    ITEM_1DAY_CLICK,
-    ITEM_1DAY_SHARE,
-    ITEM_1DAY_RETURN,
-    ITEM_1DAY_CTR,
-    ITEM_1DAY_STR,
-    ITEM_1DAY_ROV,
-    ITEM_1DAY_ROS,
-
-    ITEM_3DAY_EXP,
-    ITEM_3DAY_CLICK,
-    ITEM_3DAY_SHARE,
-    ITEM_3DAY_RETURN,
-    ITEM_3DAY_CTR,
-    ITEM_3DAY_STR,
-    ITEM_3DAY_ROV,
-    ITEM_3DAY_ROS,
-
-    ITEM_7DAY_EXP,
-    ITEM_7DAY_CLICK,
-    ITEM_7DAY_SHARE,
-    ITEM_7DAY_RETURN,
-    ITEM_7DAY_CTR,
-    ITEM_7DAY_STR,
-    ITEM_7DAY_ROV,
-    ITEM_7DAY_ROS,
-
-    ITEM_3MONTH_EXP,
-    ITEM_3MONTH_CLICK,
-    ITEM_3MONTH_SHARE,
-    ITEM_3MONTH_RETURN,
-    ITEM_3MONTH_CTR,
-    ITEM_3MONTH_STR,
-    ITEM_3MONTH_ROV,
-    ITEM_3MONTH_ROS,
-
-
-    USER_CYCLE_BUCKET_7DAY,
-    USER_CYCLE_BUCKET_30DAY,
-    USER_SHARE_BUCKET_30DAY,
-    ;
-
-
-    private final byte[] idBytes;
-    private final byte[] nameBytes;
-
-    VlogFeatureGroup() {
-        this.nameBytes = name().toLowerCase().getBytes();
-        this.idBytes = String.valueOf(ordinal()).getBytes();
-    }
-
-    public final int getId() {
-        return ordinal();
-    }
-
-    public final String getGroupName() {
-        return name().toLowerCase();
-    }
-
-    public final byte[] getGroupNameBytes() {
-        return getGroupName().getBytes();
-    }
-
-    public final byte[] getIdBytes() {
-        return idBytes;
-    }
-
-}

+ 0 - 35
src/main/java/com/tzld/piaoquan/data/score/feature/BytesGroup.java

@@ -1,35 +0,0 @@
-package com.tzld.piaoquan.data.score.feature;
-
-
-public class BytesGroup {
-    private int id;
-    private String name;
-    private byte[] nameBytes;
-    private byte[] buffer;
-
-    public BytesGroup(int id, String name, byte[] nameBytes) {
-        this.id = id;
-        this.name = name;
-        this.nameBytes = nameBytes;
-    }
-
-    public int getId() {
-        return id;
-    }
-
-    public String getName() {
-        return name;
-    }
-
-    public byte[] getNameBytes() {
-        return nameBytes;
-    }
-
-    public byte[] getBuffer() {
-        return buffer;
-    }
-
-    public void setBuffer(byte[] buffer) {
-        this.buffer = buffer;
-    }
-}

+ 0 - 192
src/main/java/com/tzld/piaoquan/data/score/feature/BytesUtils.java

@@ -1,192 +0,0 @@
-package com.tzld.piaoquan.data.score.feature;
-
-
-import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * Extract features from user, item & context info. Returns 64-bit murmurhash of feature string as results.
- */
-public class BytesUtils {
-    private static final byte[] SEPARATOR = "_".getBytes();
-    private static final byte[] FEATURE_SEPARATOR = "#".getBytes();
-    private static final int MAX_FEATURE_BYTES_LENGTH = 512;
-    private static final long SEED = 11L;
-    private BytesGroup[] groups;
-
-    /**
-     * 一个种特殊的List,在尝试写入null的时候回默默地扔掉.
-     * @param <E> List的元素类型.
-     */
-    public static class NullRejectingArrayList<E> extends ArrayList<E> {
-        public NullRejectingArrayList(int capacity) {
-            super(capacity);
-        }
-
-        public NullRejectingArrayList() {
-            super();
-        }
-
-        @Override
-        public boolean add(E e) {
-            return e != null && super.add(e);
-        }
-    }
-
-    public BytesUtils(BytesGroup[] groups) {
-        this.groups = groups;
-        for (BytesGroup g : groups) {
-            byte[] buffer = prepareBuffer(g.getName(), g.getNameBytes());
-            groups[g.getId()].setBuffer(buffer);
-        }
-    }
-
-    public byte[] prepareBuffer(String name, byte[] nameBytes) {
-
-        byte[] buffer = new byte[MAX_FEATURE_BYTES_LENGTH];
-        System.arraycopy(nameBytes, 0, buffer, 0, nameBytes.length);
-        System.arraycopy(FEATURE_SEPARATOR, 0, buffer, nameBytes.length, 1);
-        return buffer;
-    }
-
-    public BaseFeature baseFea(byte[] buffer, int length) {
-        long hash = FeatureHash.MurmurHash64(buffer, 0, length, SEED);
-
-        // debug中查看 String fea = new String(buffer, 0, length);
-        // 初始化protobuf并赋值
-        BaseFeature.Builder tmp = BaseFeature.newBuilder();
-        tmp.setIdentifier(hash);
-        return tmp.build();
-    }
-
-    public BaseFeature makeFea(int id, byte[] value) {
-        byte[] buffer = groups[id].getBuffer();
-        if (buffer == null || value == null) {
-            return null;
-        }
-
-        final int nameLength = groups[id].getNameBytes().length + 1;
-        final int length = nameLength + value.length;
-        System.arraycopy(value, 0, buffer, nameLength, value.length);
-        return baseFea(buffer, length);
-    }
-
-    public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2) {
-        byte[] buffer = groups[id].getBuffer();
-        if (buffer == null || p1 == null || p2 == null) {
-            return null;
-        }
-
-        final int nameLength = groups[id].getNameBytes().length + 1;
-        final int length = nameLength + p1.length + 1 + p2.length;
-
-        System.arraycopy(p1, 0, buffer, nameLength, p1.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
-        System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
-        return baseFea(buffer, length);
-    }
-
-    public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2, final byte[] p3) {
-        byte[] buffer = groups[id].getBuffer();
-        if (buffer == null || p1 == null || p2 == null || p3 == null) {
-            return null;
-        }
-
-        final int nameLength = groups[id].getNameBytes().length + 1;
-        final int length = nameLength + p1.length + 1 + p2.length + 1 + p3.length;
-        System.arraycopy(p1, 0, buffer, nameLength, p1.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
-        System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length, 1);
-        System.arraycopy(p3, 0, buffer, nameLength + p1.length + 1 + p2.length + 1, p3.length);
-
-        return baseFea(buffer, length);
-    }
-
-    public BaseFeature makeFea(int id, final byte[] p1, final byte[] p2, final byte[] p3, final byte[] p4) {
-        byte[] buffer = groups[id].getBuffer();
-        if (buffer == null || p1 == null || p2 == null || p3 == null || p4 == null) {
-            return null;
-        }
-
-        final int nameLength = groups[id].getNameBytes().length + 1;
-        final int length = nameLength + p1.length + 1 + p2.length + 1 + p3.length + 1 + p4.length;
-        System.arraycopy(p1, 0, buffer, nameLength, p1.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length, 1);
-        System.arraycopy(p2, 0, buffer, nameLength + p1.length + 1, p2.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length, 1);
-        System.arraycopy(p3, 0, buffer, nameLength + p1.length + 1 + p2.length + 1, p3.length);
-        System.arraycopy(SEPARATOR, 0, buffer, nameLength + p1.length + 1 + p2.length + 1 + p3.length, 1);
-        System.arraycopy(p4, 0, buffer, nameLength + p1.length + 1 + p2.length + 1 + p3.length + 1, p4.length);
-
-        return baseFea(buffer, length);
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[][] list) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(list.length);
-        for (byte[] t: list) {
-            result.add(makeFea(id, t));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
-        for (byte[] l: left) {
-            result.add(makeFea(id, l, right));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right1, byte[] right2) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
-        for (byte[] l: left) {
-            result.add(makeFea(id, l, right1, right2));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[][] left, byte[] right1, byte[] right2, byte[] right3) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length);
-        for (byte[] l: left) {
-            result.add(makeFea(id, l, right1, right2, right3));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[] left, byte[][] right) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
-        for (byte[] r : right) {
-            result.add(makeFea(id, left, r));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[] left1, byte[] left2, byte[][] right) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
-        for (byte[] r : right) {
-            result.add(makeFea(id, left1, left2, r));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[] left1, byte[] left2, byte[] left3, byte[][] right) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(right.length);
-        for (byte[] r : right) {
-            result.add(makeFea(id, left1, left2, left3, r));
-        }
-        return result;
-    }
-
-    public List<BaseFeature> makeFea(int id, byte[][] left, byte[][] right) {
-        List<BaseFeature> result = new NullRejectingArrayList<BaseFeature>(left.length * right.length);
-        for (byte[] l: left) {
-            for (byte[] r: right) {
-                result.add(makeFea(id, l, r));
-            }
-        }
-        return result;
-    }
-}

+ 0 - 230
src/main/java/com/tzld/piaoquan/data/score/feature/FeatureHash.java

@@ -1,230 +0,0 @@
-package com.tzld.piaoquan.data.score.feature;
-
-
-import java.math.BigInteger;
-import java.nio.ByteBuffer;
-import java.nio.charset.Charset;
-
-public class FeatureHash {
-    public static Charset CharSetUTF8 = Charset.forName("UTF-8");
-
-    public static long getUInt32(byte a, byte b, byte c, byte d) {
-        return (d << 24 | (c & 0xFF) << 16 | (b & 0xFF) << 8 | (a & 0xFF));
-    }
-
-    public static long hash64(byte[] data) {
-        return MurmurHash64A(ByteBuffer.wrap(data), 0, data.length, 11L);
-    }
-
-    public static long MurmurHash64A(ByteBuffer buffer, int from, int len, long seed) {
-        final long m = 0xc6a4a7935bd1e995L;
-        final int r = 47;
-
-        long h = (seed) ^ (len * m);
-        int longLength = len / 8;
-
-        for (int i = 0; i < longLength; ++i) {
-            final int bytePos = from + i * 8;
-            long k = buffer.getLong(bytePos);
-
-            k *= m;
-            k ^= k >> r;
-            k *= m;
-            h ^= k;
-            h *= m;
-        }
-
-        final int remainingPos = len & ~7;
-        switch (len % 8) {
-            case 7: h ^= (long)(buffer.get(remainingPos + 6) & 0xFF) << 48;
-            case 6: h ^= (long)(buffer.get(remainingPos + 5) & 0xFF) << 40;
-            case 5: h ^= (long)(buffer.get(remainingPos + 4) & 0xFF) << 32;
-            case 4: h ^= (long)(buffer.get(remainingPos + 3) & 0xFF) << 24;
-            case 3: h ^= (long)(buffer.get(remainingPos + 2) & 0xFF) << 16;
-            case 2: h ^= (long)(buffer.get(remainingPos + 1) & 0xFF) << 8;
-            case 1:
-                h ^= (long)(buffer.get(remainingPos) & 0xFF);
-                h *= m;
-        }
-
-        h ^= h >>> r;
-        h *= m;
-        h ^= h >>> r;
-        return h;
-    }
-
-    public static long MurmurHash32(byte data[], int len, long seed) {
-        long m = 0x5bd1e995L;
-        int r = 24;
-
-        long h = seed ^ len;
-
-        int offset = 0;
-        while (len >= 4) {
-            long k = getUInt32(data[offset], data[offset + 1], data[offset + 2], data[offset + 3]);
-
-            k *= m;
-            k &= 0xFFFFFFFFL;
-            k ^= k >> r;
-            k *= m;
-            k &= 0xFFFFFFFFL;
-
-            h *= m;
-            h &= 0xFFFFFFFFL;
-            h ^= k;
-
-            offset += 4;
-            len -= 4;
-        }
-
-        // Handle the last few bytes of the input array
-        switch (len) {
-            case 3: h ^= data[offset + 2] << 16;
-            case 2: h ^= data[offset + 1] << 8;
-            case 1: h ^= data[offset];
-                h *= m;
-                h &= 0xFFFFFFFFL;
-        } ;
-
-        // Do a few final mixes of the hash to ensure the last few
-        // bytes are well-incorporated.
-
-        h ^= h >> 13;
-        h *= m;
-        h &= 0xFFFFFFFFL;
-        h ^= h >> 15;
-
-        return h;
-    }
-
-    // 64-bit hash for 32-bit platforms
-    public static long MurmurHash64(byte[] buffer, int start, int len, long seed) {
-        final long m = 0x5bd1e995L;
-        final int r = 24;
-        final int original = len;
-
-        long h1 = (seed) ^ len;
-        long h2 = (seed >> 32);
-
-        int offset = start;
-        while (len >= 8) {
-            long k1 = getUInt32(buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3]);
-            // long k1 = buffer.getInt(offset);
-
-            k1 *= m; k1 &= 0xFFFFFFFFL; k1 ^= k1 >> r; k1 *= m; k1 &= 0xFFFFFFFFL;
-            h1 *= m; h1 &= 0xFFFFFFFFL; h1 ^= k1;
-            offset += 4;
-
-            long k2 = getUInt32(buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3]);
-            // long k2 = buffer.getInt(offset);
-            k2 *= m; k2 &= 0xFFFFFFFFL; k2 ^= k2 >> r; k2 *= m; k2 &= 0xFFFFFFFFL;
-            h2 *= m; h2 &= 0xFFFFFFFFL; h2 ^= k2;
-
-            offset += 4;
-            len -= 8;
-        }
-
-        if (len >= 4) {
-            long k1 = getUInt32(buffer[offset], buffer[offset + 1], buffer[offset + 2], buffer[offset + 3]);
-            // long k1 = buffer.getInt(offset);
-            k1 *= m; k1 &= 0xFFFFFFFFL; k1 ^= k1 >> r; k1 *= m; k1 &= 0xFFFFFFFFL;
-            h1 *= m; h1 &= 0xFFFFFFFFL; h1 ^= k1;
-            offset += 4;
-            len -= 4;
-        }
-
-        switch (len) {
-            case 3: h2 ^= (buffer[offset + 2] & 0xFF) << 16;
-            case 2: h2 ^= (buffer[offset + 1] & 0xFF) << 8;
-            case 1: h2 ^= (buffer[offset] & 0xFF);
-                h2 *= m;
-                h2 &= 0xFFFFFFFFL;
-        } ;
-
-        h1 ^= h2 >> 18;
-        h1 *= m; h1 &= 0xFFFFFFFFL;
-        h2 ^= h1 >> 22;
-        h2 *= m; h2 &= 0xFFFFFFFFL;
-        h1 ^= h2 >> 17;
-        h1 *= m; h1 &= 0xFFFFFFFFL;
-        h2 ^= h1 >> 19;
-        h2 *= m; h2 &= 0xFFFFFFFFL;
-
-        /*BigInteger ans = BigInteger.valueOf(h1).shiftLeft(32).or(BigInteger.valueOf(h2));
-        return ans.longValue();*/
-        //System.err.println("feature: " + new String(buffer, 0, original) + " length: " + original + " hash: " + (h1 << 32 | h2) + " daze");
-        return h1 << 32 | h2;
-    }
-
-    // 64-bit hash for 32-bit platforms
-    public static BigInteger MurmurHash64(byte data[], int len, long seed) {
-        long m = 0x5bd1e995L;
-        int r = 24;
-
-        long h1 = (seed) ^ len;
-        long h2 = (seed >> 32);
-
-        int offset = 0;
-        while (len >= 8) {
-            long k1 = getUInt32(data[offset], data[offset + 1], data[offset + 2], data[offset + 3]);
-            k1 *= m; k1 &= 0xFFFFFFFFL; k1 ^= k1 >> r; k1 *= m; k1 &= 0xFFFFFFFFL;
-            h1 *= m; h1 &= 0xFFFFFFFFL; h1 ^= k1;
-
-            long k2 = getUInt32(data[offset + 4], data[offset + 5], data[offset + 6], data[offset + 7]);
-            k2 *= m; k2 &= 0xFFFFFFFFL; k2 ^= k2 >> r; k2 *= m; k2 &= 0xFFFFFFFFL;
-            h2 *= m; h2 &= 0xFFFFFFFFL; h2 ^= k2;
-
-            offset += 8;
-            len -= 8;
-        }
-
-        if (len >= 4) {
-            long k1 = getUInt32(data[offset], data[offset + 1], data[offset + 2], data[offset + 3]);
-            k1 *= m; k1 &= 0xFFFFFFFFL; k1 ^= k1 >> r; k1 *= m; k1 &= 0xFFFFFFFFL;
-            h1 *= m; h1 &= 0xFFFFFFFFL; h1 ^= k1;
-            offset += 4;
-            len -= 4;
-        }
-
-        switch (len) {
-            case 3: h2 ^= (data[offset + 2] & 0xFF) << 16;
-            case 2: h2 ^= (data[offset + 1] & 0xFF) << 8;
-            case 1: h2 ^= (data[offset] & 0xFF);
-                h2 *= m;
-                h2 &= 0xFFFFFFFFL;
-        } ;
-
-        h1 ^= h2 >> 18;
-        h1 *= m; h1 &= 0xFFFFFFFFL;
-        h2 ^= h1 >> 22;
-        h2 *= m; h2 &= 0xFFFFFFFFL;
-        h1 ^= h2 >> 17;
-        h1 *= m; h1 &= 0xFFFFFFFFL;
-        h2 ^= h1 >> 19;
-        h2 *= m; h2 &= 0xFFFFFFFFL;
-
-        BigInteger ans = BigInteger.valueOf(h1).shiftLeft(32).or(BigInteger.valueOf(h2));
-        return ans;
-    }
-
-    public static String hash(String input) {
-        byte[] tt = input.getBytes(CharSetUTF8);
-        return MurmurHash64(tt, tt.length, 11L).toString();
-    }
-
-    public static Long hashToLong(String input) {
-        byte[] tt = input.getBytes(CharSetUTF8);
-        return MurmurHash64(tt, tt.length, 11L).longValue();
-    }
-
-    /** the constant 2^64 */
-    private static final BigInteger TWO_64 = BigInteger.ONE.shiftLeft(64);
-
-    public static String asUnsignedLongString(long l) {
-        BigInteger b = BigInteger.valueOf(l);
-        if (b.signum() < 0) {
-            b = b.add(TWO_64);
-        }
-        return b.toString();
-    }
-}

+ 0 - 67
src/main/java/com/tzld/piaoquan/data/score/feature/LRBytesFeatureExtractorBase.java

@@ -1,67 +0,0 @@
-package com.tzld.piaoquan.data.score.feature;
-
-import com.google.common.collect.ArrayListMultimap;
-import com.google.common.collect.ListMultimap;
-import com.tzld.piaoquan.data.base.RequestContextBytesFeature;
-import com.tzld.piaoquan.data.base.UserBytesFeature;
-import com.tzld.piaoquan.data.base.VideoBytesFeature;
-import com.tzld.piaoquan.data.base.VlogFeatureGroup;
-import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
-import com.tzld.piaoquan.recommend.server.gen.recommend.FeatureGroup;
-import com.tzld.piaoquan.recommend.server.gen.recommend.LRSamples;
-
-import java.util.List;
-
-
-public abstract class LRBytesFeatureExtractorBase {
-    private static final double DEFAULT_USER_CTR_GROUP = 10.0;
-    private static final double DEFAULT_ARTICLE_CTR_GROUP = 100.0;
-
-
-    private BytesUtils utils;
-    //Feature Group & Features
-    ListMultimap<FeatureGroup, BaseFeature> features = ArrayListMultimap.create();
-    int groupCount;
-
-
-    LRBytesFeatureExtractorBase() {
-        groupCount = VlogFeatureGroup.values().length;
-        BytesGroup[] groups = new BytesGroup[groupCount];
-        for (VlogFeatureGroup g: VlogFeatureGroup.values()) {
-            groups[g.ordinal()] = new BytesGroup(g.ordinal(),
-                    g.getGroupName(), g.getGroupNameBytes());
-        }
-        utils = new BytesUtils(groups);
-    }
-
-    private FeatureGroup makeGroup(VlogFeatureGroup group){
-        FeatureGroup.Builder g = FeatureGroup.newBuilder();
-        g.setType("1");
-        g.setName(group.getGroupName());
-        g.setId(group.ordinal());
-        return g.build();
-    };
-
-
-    void makeFea(VlogFeatureGroup group, byte[] value) {
-        FeatureGroup featureGroup = makeGroup(group);
-        BaseFeature feature = utils.makeFea(group.ordinal(), value);
-        features.put(featureGroup, feature);
-    }
-
-    void makeFea(VlogFeatureGroup group, byte[][] list) {
-        FeatureGroup g = makeGroup(group);
-        List<BaseFeature> featureList = utils.makeFea(group.ordinal(), list);
-        features.putAll(g, featureList);
-    }
-
-    public ListMultimap<FeatureGroup, BaseFeature> getFeatures() {
-        return features;
-    }
-
-    public abstract LRSamples single(UserBytesFeature userBytesFeature,
-                                     VideoBytesFeature videoBytesFeature,
-                                     RequestContextBytesFeature requestContextBytesFeature);
-
-
-}

+ 0 - 152
src/main/java/com/tzld/piaoquan/data/score/feature/VlogShareLRFeatureExtractor.java

@@ -1,152 +0,0 @@
-package com.tzld.piaoquan.data.score.feature;
-
-import com.tzld.piaoquan.data.base.*;
-import com.tzld.piaoquan.recommend.server.gen.recommend.*;
-
-import java.util.ArrayList;
-import java.util.List;
-
-public class VlogShareLRFeatureExtractor extends LRBytesFeatureExtractorBase {
-
-    public VlogShareLRFeatureExtractor() {
-        super();
-    }
-
-    // TODO
-    // 补充待抽取的context feature
-    public void getContextFeatures(RequestContextBytesFeature requestContextBytes) {
-        makeFea(VlogFeatureGroup.MACHINEINFO_BRAND, requestContextBytes.getMachineinfo_brand());
-        makeFea(VlogFeatureGroup.MACHINEINFO_MODEL, requestContextBytes.getMachineinfo_model());
-        makeFea(VlogFeatureGroup.MACHINEINFO_PLATFORM, requestContextBytes.getMachineinfo_platform());
-        makeFea(VlogFeatureGroup.MACHINEINFO_SDKVERSION, requestContextBytes.getMachineinfo_sdkversion());
-        makeFea(VlogFeatureGroup.MACHINEINFO_SYSTEM, requestContextBytes.getMachineinfo_system());
-        makeFea(VlogFeatureGroup.MACHINEINFO_WECHATVERSION, requestContextBytes.getMachineinfo_brand());
-
-        makeFea(VlogFeatureGroup.DAY, requestContextBytes.getWeek());
-        makeFea(VlogFeatureGroup.WEEK, requestContextBytes.getWeek());
-        makeFea(VlogFeatureGroup.HOUR, requestContextBytes.getHour());
-
-    }
-
-    //TODO
-    public void getUserFeatures(UserBytesFeature user) {
-        makeFea(VlogFeatureGroup.USER_CYCLE_BUCKET_7DAY, user.getUser_cycle_bucket_7days());
-        makeFea(VlogFeatureGroup.USER_SHARE_BUCKET_30DAY, user.getUser_share_bucket_30days());
-        makeFea(VlogFeatureGroup.USER_SHARE_BUCKET_30DAY, user.getUser_cycle_bucket_30days());
-
-        // 1day features
-        makeFea(VlogFeatureGroup.USER_1DAY_EXP, user.getDay1_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.USER_1DAY_CLICK, user.getDay1_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.USER_1DAY_SHARE, user.getDay1_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.USER_1DAY_RETURN, user.getDay1_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.USER_1DAY_CTR, user.getDay1_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.USER_1DAY_STR, user.getDay1_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.USER_1DAY_ROV, user.getDay1_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.USER_1DAY_ROS, user.getDay1_cnt_features().get("ros"));
-
-        // 3day features
-        makeFea(VlogFeatureGroup.USER_3DAY_EXP, user.getDay3_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.USER_3DAY_CLICK, user.getDay3_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.USER_3DAY_SHARE, user.getDay3_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.USER_3DAY_RETURN, user.getDay3_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.USER_3DAY_CTR, user.getDay3_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.USER_3DAY_STR, user.getDay3_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.USER_3DAY_ROV, user.getDay3_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.USER_3DAY_ROS, user.getDay3_cnt_features().get("ros"));
-
-        // 7day features
-        makeFea(VlogFeatureGroup.USER_7DAY_EXP, user.getDay7_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.USER_7DAY_CLICK, user.getDay7_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.USER_7DAY_SHARE, user.getDay7_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.USER_7DAY_RETURN, user.getDay7_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.USER_7DAY_CTR, user.getDay7_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.USER_7DAY_STR, user.getDay7_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.USER_7DAY_ROV, user.getDay7_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.USER_7DAY_ROS, user.getDay7_cnt_features().get("ros"));
-
-        // 3month features
-        makeFea(VlogFeatureGroup.USER_3MONTH_EXP, user.getMonth3_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_CLICK, user.getMonth3_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_SHARE, user.getMonth3_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_RETURN, user.getMonth3_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_CTR, user.getMonth3_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_STR, user.getMonth3_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_ROV, user.getMonth3_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.USER_3MONTH_ROS, user.getMonth3_cnt_features().get("ros"));
-
-    }
-
-    public void getItemFeature(VideoBytesFeature item) {
-        makeFea(VlogFeatureGroup.VIDEOID, item.getVideoId());
-        makeFea(VlogFeatureGroup.UP_ID, item.getUpId());
-        // 1day features
-        makeFea(VlogFeatureGroup.ITEM_1DAY_EXP, item.getItem_day1_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_CLICK, item.getItem_day1_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_SHARE, item.getItem_day1_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_RETURN, item.getItem_day1_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_CTR, item.getItem_day1_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_STR, item.getItem_day1_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_ROV, item.getItem_day1_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.ITEM_1DAY_ROS, item.getItem_day1_cnt_features().get("ros"));
-
-        // 3day features
-        makeFea(VlogFeatureGroup.ITEM_3DAY_EXP, item.getItem_day1_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_CLICK, item.getItem_day1_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_SHARE, item.getItem_day1_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_RETURN, item.getItem_day1_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_CTR, item.getItem_day1_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_STR, item.getItem_day1_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_ROV, item.getItem_day1_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.ITEM_3DAY_ROS, item.getItem_day1_cnt_features().get("ros"));
-
-        // 7day features
-        makeFea(VlogFeatureGroup.ITEM_7DAY_EXP, item.getItem_day7_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_CLICK, item.getItem_day7_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_SHARE, item.getItem_day7_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_RETURN, item.getItem_day7_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_CTR, item.getItem_day7_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_STR, item.getItem_day7_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_ROV, item.getItem_day7_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.ITEM_7DAY_ROS, item.getItem_day7_cnt_features().get("ros"));
-
-        // 3month features
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_EXP, item.getItem_month3_cnt_features().get("exp"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_CLICK, item.getItem_month3_cnt_features().get("click"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_SHARE, item.getItem_month3_cnt_features().get("share"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_RETURN, item.getItem_month3_cnt_features().get("return"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_CTR, item.getItem_month3_cnt_features().get("ctr"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_STR, item.getItem_month3_cnt_features().get("str"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_ROV, item.getItem_month3_cnt_features().get("rov"));
-        makeFea(VlogFeatureGroup.ITEM_3MONTH_ROS, item.getItem_month3_cnt_features().get("ros"));
-
-    }
-
-    @Override
-    public synchronized LRSamples single(UserBytesFeature userBytesFeature,
-                                         VideoBytesFeature videoBytesFeature,
-                                         RequestContextBytesFeature requestContextBytesFeature) {
-        features.clear();
-        // extract features
-        getUserFeatures(userBytesFeature);
-        getContextFeatures(requestContextBytesFeature);
-        getItemFeature(videoBytesFeature);
-
-        LRSamples.Builder lr = com.tzld.piaoquan.recommend.server.gen.recommend.LRSamples.newBuilder();
-        lr.setGroupNum(groupCount);
-        List<FeatureGroup> keys = new ArrayList<>(features.keySet());
-        int count = 0;
-        for(FeatureGroup group : keys) {
-            List<BaseFeature> fea = features.get(group);
-            GroupedFeature.Builder gf = GroupedFeature.newBuilder();
-            gf.setGroup(group);
-            gf.setCount(fea.size());
-            gf.addAllFeatures(fea);
-            count += fea.size();
-            lr.addFeatures(gf);
-        }
-        lr.setCount(count);
-        return lr.build();
-    }
-
-
-}

+ 1 - 22
src/main/java/examples/dataloader/AdRedisFeatureConstructor.java

@@ -7,10 +7,7 @@ import com.aliyun.odps.data.Record;
 import com.tzld.piaoquan.ad.engine.commons.base.AdActionFeature;
 import com.tzld.piaoquan.ad.engine.commons.base.AdItemFeature;
 import com.tzld.piaoquan.ad.engine.commons.base.UserAdFeature;
-import com.tzld.piaoquan.data.base.ItemFeature;
-import com.tzld.piaoquan.data.base.RequestContext;
-import com.tzld.piaoquan.data.base.UserActionFeature;
-import com.tzld.piaoquan.data.base.UserFeature;
+
 
 import java.util.HashMap;
 import java.util.Map;
@@ -31,24 +28,6 @@ public class AdRedisFeatureConstructor {
     private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
 
 
-    public static RequestContext constructRequestContext(Record record) {
-        RequestContext requestContext = new RequestContext();
-        requestContext.setApptype(record.getString("apptype"));
-        requestContext.setMachineinfo_brand(record.getString("machineinfo_brand"));
-        requestContext.setMachineinfo_model(record.getString("machineinfo_model"));
-        requestContext.setMachineinfo_platform(record.getString("machineinfo_platform"));
-        requestContext.setMachineinfo_sdkversion(record.getString("machineinfo_sdkversion"));
-        requestContext.setMachineinfo_system(record.getString("machineinfo_system"));
-        requestContext.setMachineinfo_wechatversion(record.getString("machineinfo_wechatversion"));
-        requestContext.setDay(record.getString("ctx_day"));
-        requestContext.setWeek(record.getString("ctx_week"));
-        requestContext.setHour(record.getString("ctx_hour"));
-        requestContext.setRegion(record.getString("ctx_region"));
-        requestContext.setCity(record.getString("ctx_city"));
-        return requestContext;
-    }
-
-
     public static UserAdFeature constructUserFeature(Record record) {
         UserAdFeature userFeature = new UserAdFeature();
         userFeature.setMid(record.getString("mids"));

+ 1 - 34
src/main/java/examples/dataloader/RecommRedisFeatureConstructor.java

@@ -4,46 +4,13 @@ package examples.dataloader;
 import com.aliyun.odps.account.Account;
 import com.aliyun.odps.account.AliyunAccount;
 import com.aliyun.odps.data.Record;
-import com.tzld.piaoquan.data.base.ItemFeature;
-import com.tzld.piaoquan.data.base.RequestContext;
-import com.tzld.piaoquan.data.base.UserActionFeature;
-import com.tzld.piaoquan.data.base.UserFeature;
+import com.tzld.piaoquan.recommend.feature.domain.video.base.*;
 
 import java.util.HashMap;
 import java.util.Map;
 
 public class RecommRedisFeatureConstructor {
 
-    private static final String BUCKET_NAME = "ali-recommend";
-    private static final Map<String, String> ODPS_CONFIG = new HashMap<String, String>();
-
-    static {
-        ODPS_CONFIG.put("ENDPOINT", "http://service.cn.maxcompute.aliyun.com/api");
-        ODPS_CONFIG.put("ACCESSID", "LTAIWYUujJAm7CbH");
-        ODPS_CONFIG.put("ACCESSKEY", "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P");
-    }
-
-    ;
-
-    private static final Account account = new AliyunAccount(ODPS_CONFIG.get("ACCESSID"), ODPS_CONFIG.get("ACCESSKEY"));
-
-
-    public static RequestContext constructRequestContext(Record record) {
-        RequestContext requestContext = new RequestContext();
-        requestContext.setApptype(record.getString("apptype"));
-        requestContext.setMachineinfo_brand(record.getString("machineinfo_brand"));
-        requestContext.setMachineinfo_model(record.getString("machineinfo_model"));
-        requestContext.setMachineinfo_platform(record.getString("machineinfo_platform"));
-        requestContext.setMachineinfo_sdkversion(record.getString("machineinfo_sdkversion"));
-        requestContext.setMachineinfo_system(record.getString("machineinfo_system"));
-        requestContext.setMachineinfo_wechatversion(record.getString("machineinfo_wechatversion"));
-        requestContext.setDay(record.getString("ctx_day"));
-        requestContext.setWeek(record.getString("ctx_week"));
-        requestContext.setHour(record.getString("ctx_hour"));
-        requestContext.setRegion(record.getString("ctx_region"));
-        requestContext.setCity(record.getString("ctx_city"));
-        return requestContext;
-    }
 
 
     public static UserFeature constructUserFeature(Record record) {

+ 3 - 5
src/main/java/examples/dataloader/RecommendSampleConstructor.java

@@ -4,10 +4,8 @@ package examples.dataloader;
 import com.aliyun.odps.account.Account;
 import com.aliyun.odps.account.AliyunAccount;
 import com.aliyun.odps.data.Record;
-import com.tzld.piaoquan.data.base.ItemFeature;
-import com.tzld.piaoquan.data.base.RequestContext;
-import com.tzld.piaoquan.data.base.UserActionFeature;
-import com.tzld.piaoquan.data.base.UserFeature;
+import com.tzld.piaoquan.recommend.feature.domain.video.base.*;
+
 
 import java.util.HashMap;
 import java.util.Map;
@@ -48,7 +46,7 @@ public class RecommendSampleConstructor {
 
     public static UserFeature constructUserFeature(Record record) {
         UserFeature userFeature = new UserFeature();
-        userFeature.setUid(record.get("uid").toString());
+        userFeature.setUid(record.get("mid").toString());
         userFeature.setUser_cycle_bucket_7days(record.getString("u_cycle_bucket_7days"));
         userFeature.setUser_cycle_bucket_30days(record.getString("u_cycle_bucket_30days"));
         userFeature.setUser_share_bucket_30days(record.getString("u_share_bucket_30days"));

+ 0 - 1
src/main/java/examples/sparksql/SparkAdCTRSampleLoader.java

@@ -4,7 +4,6 @@ import com.aliyun.odps.TableSchema;
 import com.aliyun.odps.data.Record;
 import com.google.common.collect.ListMultimap;
 import com.tzld.piaoquan.ad.engine.commons.base.*;
-import com.tzld.piaoquan.data.base.*;
 import com.tzld.piaoquan.ad.engine.commons.score.feature.VlogAdCtrLRFeatureExtractor;
 import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
 import com.tzld.piaoquan.recommend.server.gen.recommend.FeatureGroup;

+ 5 - 4
src/main/java/examples/sparksql/SparkShareRatioSampleLoader.java

@@ -3,11 +3,12 @@ package examples.sparksql;
 import com.aliyun.odps.TableSchema;
 import com.aliyun.odps.data.Record;
 import com.google.common.collect.ListMultimap;
-import com.tzld.piaoquan.data.base.*;
+import com.tzld.piaoquan.recommend.feature.domain.video.base.*;
+
 import examples.dataloader.RecommendSampleConstructor;
-import com.tzld.piaoquan.data.score.feature.VlogShareLRFeatureExtractor;
-import com.tzld.piaoquan.recommend.server.gen.recommend.BaseFeature;
-import com.tzld.piaoquan.recommend.server.gen.recommend.FeatureGroup;
+import com.tzld.piaoquan.recommend.feature.domain.video.feature.VlogShareLRFeatureExtractor;
+import com.tzld.piaoquan.recommend.feature.gen.recommend.BaseFeature;
+import com.tzld.piaoquan.recommend.feature.gen.recommend.FeatureGroup;
 import org.apache.spark.SparkConf;
 import org.apache.spark.aliyun.odps.OdpsOps;
 import org.apache.spark.api.java.JavaRDD;

+ 0 - 74
src/main/java/examples/sparksql/SparkUserFeaToRedisLoader.java

@@ -1,74 +0,0 @@
-package examples.sparksql;
-
-import com.aliyun.odps.data.Record;
-import com.tzld.piaoquan.data.base.*;
-import examples.dataloader.RecommRedisFeatureConstructor;
-import org.apache.spark.SparkConf;
-import org.apache.spark.aliyun.odps.OdpsOps;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
-import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
-import org.springframework.data.redis.core.RedisTemplate;
-import org.springframework.data.redis.serializer.StringRedisSerializer;
-
-import java.util.HashMap;
-import java.util.Map;
-
-
-public class SparkUserFeaToRedisLoader {
-
-    private static final String userKeyFormat = "user:%s";
-
-    public static RedisTemplate<String, String> buildRedisTemplate() {
-        RedisStandaloneConfiguration rsc = new RedisStandaloneConfiguration();
-        rsc.setPort(6379);
-        rsc.setPassword("Wqsd@2019");
-        rsc.setHostName("r-bp1ps6my7lzg8rdhwx682.redis.rds.aliyuncs.com");
-        RedisTemplate<String, String> template = new RedisTemplate<>();
-        JedisConnectionFactory fac = new JedisConnectionFactory(rsc);
-        fac.afterPropertiesSet();
-        template.setDefaultSerializer(new StringRedisSerializer());
-        template.setConnectionFactory(fac);
-        template.afterPropertiesSet();
-        return template;
-    }
-
-    public static void loadFeatureToRedis(RedisTemplate<String, String> redisTemplate, Record line) {
-        Map<String, String> userFeaRedisFormat = new HashMap<String, String>();
-        UserFeature userFeature = RecommRedisFeatureConstructor.constructUserFeature(line);
-        String key = String.format(userKeyFormat, userFeature.getKey());
-        String value = userFeature.getValue();
-        userFeaRedisFormat.put(key, value);
-        redisTemplate.opsForValue().multiSet(userFeaRedisFormat);
-    }
-
-
-    public static void main(String[] args) {
-
-        String partition = args[0];
-        String accessId = "LTAIWYUujJAm7CbH";
-        String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
-        String odpsUrl = "http://service.odps.aliyun.com/api";
-        String tunnelUrl = "http://dt.cn-hangzhou.maxcompute.aliyun-inc.com";
-        String project = "loghubods";
-        String table = "alg_recsys_user_info";
-
-        SparkConf sparkConf = new SparkConf().setAppName("E-MapReduce Demo 3-2: Spark MaxCompute Demo (Java)");
-        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
-        OdpsOps odpsOps = new OdpsOps(jsc.sc(), accessId, accessKey, odpsUrl, tunnelUrl);
-        System.out.println("Read odps table...");
-
-        JavaRDD<Record> readData = odpsOps.readTableWithJava(project, table, partition, null, Integer.valueOf(10));
-
-        readData.sample(false, 0.0001).foreachPartition(
-                rowIterator -> {
-                    RedisTemplate<String, String> redisTemplate = buildRedisTemplate();
-                    rowIterator.forEachRemaining(line -> loadFeatureToRedis(redisTemplate, line));
-                }
-        );
-    }
-
-
-
-}

+ 124 - 0
src/main/java/examples/sparksql/SparkVideoFeaToRedisLoader.java

@@ -0,0 +1,124 @@
+package examples.sparksql;
+
+import com.aliyun.odps.TableSchema;
+import com.aliyun.odps.data.Record;
+
+import com.tzld.piaoquan.recommend.feature.domain.video.base.ItemFeature;
+import com.tzld.piaoquan.recommend.feature.domain.video.base.UserFeature;
+import examples.dataloader.AdRedisFeatureConstructor;
+import examples.dataloader.RecommRedisFeatureConstructor;
+import org.apache.spark.SparkConf;
+import org.apache.spark.aliyun.odps.OdpsOps;
+import org.apache.spark.api.java.JavaRDD;
+import org.apache.spark.api.java.JavaSparkContext;
+import org.apache.spark.api.java.function.Function2;
+import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
+import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
+import org.springframework.data.redis.core.RedisTemplate;
+import org.springframework.data.redis.serializer.StringRedisSerializer;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+
+public class SparkVideoFeaToRedisLoader {
+
+    private static final String userKeyFormat = "user:ad:%s";
+
+    private static final String adKeyFormat = "ad:%s";
+
+
+    public static RedisTemplate<String, String> buildRedisTemplate() {
+        RedisStandaloneConfiguration rsc = new RedisStandaloneConfiguration();
+        rsc.setPort(6379);
+        rsc.setPassword("Wqsd@2019");
+        rsc.setHostName("r-bp1pi8wyv6lzvgjy5z.redis.rds.aliyuncs.com");
+        RedisTemplate<String, String> template = new RedisTemplate<>();
+        JedisConnectionFactory fac = new JedisConnectionFactory(rsc);
+        fac.afterPropertiesSet();
+        template.setDefaultSerializer(new StringRedisSerializer());
+        template.setConnectionFactory(fac);
+        template.afterPropertiesSet();
+        return template;
+    }
+
+
+    public static void loadFeatureToRedis(RedisTemplate<String, String> redisTemplate, List<String> line) {
+        Map<String, String> redisFormat = new HashMap<String, String>();
+        String key = line.get(0);
+        String value = line.get(1);
+        redisFormat.put(key, value);
+        redisTemplate.opsForValue().multiSet(redisFormat);
+    }
+
+
+    static class RecordsToAdRedisKV implements Function2<Record, TableSchema, List<String>> {
+        @Override
+        public List<String> call(Record record, TableSchema schema) throws Exception {
+            ItemFeature adItemFeature = RecommRedisFeatureConstructor.constructItemFeature(record);
+            String key = String.format(adKeyFormat, adItemFeature.getKey());
+            String value = adItemFeature.getValue();
+            List<String> kv = new ArrayList<String>();
+            kv.add(key);
+            kv.add(value);
+            return kv;
+        }
+    }
+
+
+    static class RecordsToUserRedisKV implements Function2<Record, TableSchema, List<String>> {
+        @Override
+        public List<String> call(Record record, TableSchema schema) throws Exception {
+            UserFeature userFeature = RecommRedisFeatureConstructor.constructUserFeature(record);
+            String key = String.format(userKeyFormat, userFeature.getKey());
+            String value = userFeature.getValue();
+            List<String> kv = new ArrayList<String>();
+            kv.add(key);
+            kv.add(value);
+            return kv;
+        }
+    }
+
+
+    public static void main(String[] args) {
+
+        String partition = args[0];
+        String accessId = "LTAIWYUujJAm7CbH";
+        String accessKey = "RfSjdiWwED1sGFlsjXv0DlfTnZTG1P";
+        String odpsUrl = "http://service.odps.aliyun.com/api";
+        String tunnelUrl = "http://dt.cn-hangzhou.maxcompute.aliyun-inc.com";
+        String project = "loghubods";
+        String tableAdInfo = "alg_recsys_video_info";
+        String tableUserInfo = "alg_recsys_user_info";
+
+
+        SparkConf sparkConf = new SparkConf().setAppName("E-MapReduce Demo 3-2: Spark MaxCompute Demo (Java)");
+        JavaSparkContext jsc = new JavaSparkContext(sparkConf);
+        OdpsOps odpsOps = new OdpsOps(jsc.sc(), accessId, accessKey, odpsUrl, tunnelUrl);
+        System.out.println("Read odps table...");
+
+
+        // load Ad features
+        JavaRDD<List<String>> readAdData = odpsOps.readTableWithJava(project, tableAdInfo, partition, new SparkAdFeaToRedisLoader.RecordsToAdRedisKV(), Integer.valueOf(10));
+        readAdData.sample(false, 0.001).foreachPartition(
+                rowIterator -> {
+                    RedisTemplate<String, String> redisTemplate = buildRedisTemplate();
+                    rowIterator.forEachRemaining(line -> loadFeatureToRedis(redisTemplate, line));
+                }
+        );
+
+
+        // load user features
+        JavaRDD<List<String>> readUserData = odpsOps.readTableWithJava(project, tableUserInfo, partition, new SparkAdFeaToRedisLoader.RecordsToUserRedisKV(), Integer.valueOf(50));
+        readUserData.repartition(50).sample(false, 0.00001).foreachPartition(
+                rowIterator -> {
+                    RedisTemplate<String, String> redisTemplate = buildRedisTemplate();
+                    rowIterator.forEachRemaining(line -> loadFeatureToRedis(redisTemplate, line));
+                }
+        );
+    }
+
+
+}