Sfoglia il codice sorgente

str 实时特征模型

zhangbo 1 anno fa
parent
commit
f31af0573b

+ 131 - 22
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4Rankv2Model.java

@@ -18,10 +18,11 @@ import org.apache.commons.lang3.math.NumberUtils;
 import org.springframework.data.redis.connection.RedisConnectionFactory;
 import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
 import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
-import org.springframework.data.redis.core.RedisTemplate;
 import org.springframework.data.redis.serializer.StringRedisSerializer;
+import org.springframework.data.redis.core.RedisTemplate;
 import org.springframework.stereotype.Service;
-
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorUserFeature;
+import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemFeature;
 import java.text.SimpleDateFormat;
 import java.util.*;
 import java.util.stream.Collectors;
@@ -37,6 +38,7 @@ public class RankStrategy4Rankv2Model extends RankService {
     @ApolloJsonValue("${video.model.weightv2:}")
     private Map<String, Double> mergeWeight;
     final private String CLASS_NAME = this.getClass().getSimpleName();
+
 //    public Video getTestVideo(Long id, String s){
 //        Video a1 = new Video();
 //        a1.setVideoId(id);
@@ -44,7 +46,6 @@ public class RankStrategy4Rankv2Model extends RankService {
 //        a1.setPushFrom("recall_pool_region_h");
 //        return a1;
 //    }
-
     @Override
     public List<Video> mergeAndRankRovRecall(RankParam param) {
 
@@ -58,6 +59,7 @@ public class RankStrategy4Rankv2Model extends RankService {
         rovRecallRank.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
         rovRecallRank.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
 
+        //-------------------地域内部去重+截断-------------------
         removeDuplicate(rovRecallRank);
         rovRecallRank = rovRecallRank.size() <= param.getSize()
                 ? rovRecallRank
@@ -66,6 +68,7 @@ public class RankStrategy4Rankv2Model extends RankService {
         //-------------------地域 sim returnv2 融合-------------------
         rovRecallRank.addAll(extractAndSort(param, SimHotVideoRecallStrategy.PUSH_FORM));
         rovRecallRank.addAll(extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM));
+        //-------------------地域 sim returnv2 去重-------------------
         removeDuplicate(rovRecallRank);
 
         //-------------------排-------------------
@@ -148,6 +151,10 @@ public class RankStrategy4Rankv2Model extends RankService {
         redisTemplate.setDefaultSerializer(new StringRedisSerializer());
         redisTemplate.afterPropertiesSet();
 
+        // 0: 场景特征处理
+        Map<String, String> sceneFeatureMap =  this.getSceneFeature(param);
+
+        // 1: user特征处理
         Map<String, String> userFeatureMap = new HashMap<>();
         if (param.getMid() != null && !param.getMid().isEmpty()){
             String midKey = "user_info_4video_" + param.getMid();
@@ -158,8 +165,7 @@ public class RankStrategy4Rankv2Model extends RankService {
                             new TypeToken<Map<String, String>>() {},
                             userFeatureMap);
                 }catch (Exception e){
-                    log.error(String.format("parse user json is wrong in {} with {}",
-                            this.CLASS_NAME, e));
+                    log.error(String.format("parse user json is wrong in {} with {}", this.CLASS_NAME, e));
                 }
             }else{
                 JSONObject obj = new JSONObject();
@@ -169,30 +175,39 @@ public class RankStrategy4Rankv2Model extends RankService {
             }
         }
         final Set<String> userFeatureSet = new HashSet<>(Arrays.asList(
-                "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
-                "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
-                "u_ctr_1day","u_str_1day","u_rov_1day","u_ros_1day",
-                "u_3day_exp_cnt","u_3day_click_cnt","u_3day_share_cnt","u_3day_return_cnt",
-                "u_ctr_3day","u_str_3day","u_rov_3day","u_ros_3day"
+            "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
+            "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+            "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
         ));
         Iterator<Map.Entry<String, String>> iterator = userFeatureMap.entrySet().iterator();
         while (iterator.hasNext()) {
             Map.Entry<String, String> entry = iterator.next();
             if (!userFeatureSet.contains(entry.getKey())) {
-                // 删除键值对
                 iterator.remove();
             }
         }
+        Map<String, String> f1 = RankExtractorUserFeature.getOriginFeature(userFeatureMap,
+                new HashSet<String>(Arrays.asList(
+                    "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system"
+                ))
+        );
+        Map<String, String> f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap);
+        Map<String, String> f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
+                new HashSet<String>(Arrays.asList(
+                    "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
+                    "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
+                ))
+        );
+        f1.putAll(f2);
+        f1.putAll(f3);
+        log.info("userFeature in model = {}", JSONUtils.toJson(f1));
 
-        log.info("userFeature in model = {}", JSONUtils.toJson(userFeatureMap));
-
+        // 2-1: item特征处理
         final Set<String> itemFeatureSet = new HashSet<>(Arrays.asList(
                 "total_time", "play_count_total",
                 "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
-                "i_ctr_1day", "i_str_1day", "i_rov_1day", "i_ros_1day",
-                "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt",
-                "i_ctr_3day", "i_str_3day", "i_rov_3day", "i_ros_3day"
-        ));
+                "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"
+                ));
 
         List<RankItem> rankItems = CommonCollectionUtils.toList(videos, RankItem::new);
         List<Long> videoIds = CommonCollectionUtils.toListDistinct(videos, Video::getVideoId);
@@ -212,20 +227,114 @@ public class RankStrategy4Rankv2Model extends RankService {
                     while (iteratorIn.hasNext()) {
                         Map.Entry<String, String> entry = iteratorIn.next();
                         if (!itemFeatureSet.contains(entry.getKey())) {
-                            // 删除键值对
                             iteratorIn.remove();
                         }
                     }
-                    rankItems.get(i).setFeatureMap(vfMap);
+                    Map<String, String> f4 = RankExtractorItemFeature.getItemRateFeature(vfMap);
+                    Map<String, String> f5 = RankExtractorItemFeature.cntFeatureChange(vfMap,
+                            new HashSet<String>(Arrays.asList(
+                            "total_time", "play_count_total",
+                            "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
+                            "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"))
+                    );
+                    f4.putAll(f5);
+                    rankItems.get(i).setFeatureMap(f4);
                 }catch (Exception e){
-                    log.error(String.format("parse video json is wrong in {} with {}",
-                            this.CLASS_NAME, e));
+                    log.error(String.format("parse video json is wrong in {} with {}", this.CLASS_NAME, e));
                 }
             }
         }
+        // 2-2: item 实时特征处理
+        List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
+        List<String> rtFeaPart = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
+        Calendar calendar = Calendar.getInstance();
+        String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
+        String hour = new SimpleDateFormat("HH").format(calendar.getTime());
+        String rtFeaPart1day = date + hour;
+        String rtFeaPart1h = date + hour;
+        if (rtFeaPart != null){
+            if (rtFeaPart.get(0) != null){
+                rtFeaPart1day = rtFeaPart.get(0);
+            }
+            if (rtFeaPart.get(1) != null){
+                rtFeaPart1h = rtFeaPart.get(1);
+            }
+        }
+
+        List<String> videoRtKeys1 = videoIds.stream().map(r-> "item_rt_fea_1day_" + r)
+                .collect(Collectors.toList());
+        List<String> videoRtKeys2 = videoIds.stream().map(r-> "item_rt_fea_1h_" + r)
+                .collect(Collectors.toList());
+        videoRtKeys1.addAll(videoRtKeys2);
+        List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys1);
+
+
+        if (videoRtFeatures != null){
+            int j = 0;
+            for (RankItem item: rankItems){
+                String vF = videoRtFeatures.get(j);
+                ++j;
+                if (vF == null){
+                    continue;
+                }
+                Map<String, String> vfMap = new HashMap<>();
+                Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
+                try{
+                    vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
+                    for (Map.Entry<String, String> entry : vfMap.entrySet()){
+                        String value = entry.getValue();
+                        if (value == null){
+                            continue;
+                        }
+                        String [] var1 = value.split(",");
+                        Map<String, Double> tmp = new HashMap<>();
+                        for (String var2 : var1){
+                            String [] var3 = var2.split(":");
+                            tmp.put(var3[0], Double.valueOf(var3[1]));
+                        }
+                        vfMapNew.put(entry.getKey(), tmp);
+                    }
+                }catch (Exception e){
+                    log.error(String.format("parse video item_rt_fea_1day_ json is wrong in {} with {}", this.CLASS_NAME, e));
+                }
+                Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1day);
+                item.getFeatureMap().putAll(f8);
+            }
+            for (RankItem item: rankItems){
+                String vF = videoRtFeatures.get(j);
+                ++j;
+                if (vF == null){
+                    continue;
+                }
+                Map<String, String> vfMap = new HashMap<>();
+                Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
+                try{
+                    vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
+                    for (Map.Entry<String, String> entry : vfMap.entrySet()){
+                        String value = entry.getValue();
+                        if (value == null){
+                            continue;
+                        }
+                        String [] var1 = value.split(",");
+                        Map<String, Double> tmp = new HashMap<>();
+                        for (String var2 : var1){
+                            String [] var3 = var2.split(":");
+                            tmp.put(var3[0], Double.valueOf(var3[1]));
+                        }
+                        vfMapNew.put(entry.getKey(), tmp);
+                    }
+                }catch (Exception e){
+                    log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}", this.CLASS_NAME, e));
+                }
+                Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1h);
+                item.getFeatureMap().putAll(f8);
+            }
+        }
+
+
         log.info("ItemFeature = {}", JSONUtils.toJson(videoFeatures));
 
-        Map<String, String> sceneFeatureMap =  this.getSceneFeature(param);
+
 
         List<RankItem> rovRecallScore = ScorerUtils.getScorerPipeline(ScorerUtils.BASE_CONF)
                 .scoring(sceneFeatureMap, userFeatureMap, rankItems);