|
@@ -1,374 +0,0 @@
|
|
|
-package com.tzld.piaoquan.recommend.server.service.rank.strategy;
|
|
|
-
|
|
|
-
|
|
|
-import com.alibaba.fastjson.JSONObject;
|
|
|
-import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
-import com.google.common.reflect.TypeToken;
|
|
|
-import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
-import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.RankParam;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.RankService;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorUserFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.recall.strategy.*;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.JSONUtils;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.collections4.CollectionUtils;
|
|
|
-import org.apache.commons.lang3.math.NumberUtils;
|
|
|
-import org.springframework.data.redis.connection.RedisConnectionFactory;
|
|
|
-import org.springframework.data.redis.connection.RedisStandaloneConfiguration;
|
|
|
-import org.springframework.data.redis.connection.jedis.JedisConnectionFactory;
|
|
|
-import org.springframework.data.redis.core.RedisTemplate;
|
|
|
-import org.springframework.data.redis.serializer.StringRedisSerializer;
|
|
|
-import org.springframework.stereotype.Service;
|
|
|
-
|
|
|
-import java.text.SimpleDateFormat;
|
|
|
-import java.util.*;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-/**
|
|
|
- * @author zhangbo
|
|
|
- * @desc 模型的排序实验
|
|
|
- */
|
|
|
-@Service
|
|
|
-@Slf4j
|
|
|
-public class RankStrategy4Rankv2Model extends RankService {
|
|
|
-
|
|
|
- @ApolloJsonValue("${video.model.weightv2:}")
|
|
|
- private Map<String, Double> mergeWeight;
|
|
|
- final private String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
-
|
|
|
-// public Video getTestVideo(Long id, String s){
|
|
|
-// Video a1 = new Video();
|
|
|
-// a1.setVideoId(id);
|
|
|
-// a1.setFlowPool(s);
|
|
|
-// a1.setPushFrom("recall_pool_region_h");
|
|
|
-// return a1;
|
|
|
-// }
|
|
|
- @Override
|
|
|
- public List<Video> mergeAndRankRovRecall(RankParam param) {
|
|
|
-
|
|
|
- //-------------------地域内部融合-------------------
|
|
|
- List<Video> rovRecallRank = new ArrayList<>();
|
|
|
-// rovRecallRank.add(0, getTestVideo(1070462L, ""));
|
|
|
-// rovRecallRank.add(0, getTestVideo(1085062L, ""));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, RegionHRecallStrategy.PUSH_FORM));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, RegionHDupRecallStrategy.PUSH_FORM));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, Region24HRecallStrategy.PUSH_FORM));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, RegionRelative24HRecallStrategy.PUSH_FORM));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, RegionRelative24HDupRecallStrategy.PUSH_FORM));
|
|
|
-
|
|
|
- //-------------------地域内部去重+截断-------------------
|
|
|
- removeDuplicate(rovRecallRank);
|
|
|
- rovRecallRank = rovRecallRank.size() <= param.getSize()
|
|
|
- ? rovRecallRank
|
|
|
- : rovRecallRank.subList(0, param.getSize());
|
|
|
-
|
|
|
- //-------------------地域 sim returnv2 融合-------------------
|
|
|
- rovRecallRank.addAll(extractAndSort(param, SimHotVideoRecallStrategy.PUSH_FORM));
|
|
|
- rovRecallRank.addAll(extractAndSort(param, ReturnVideoRecallStrategy.PUSH_FORM));
|
|
|
- //-------------------地域 sim returnv2 去重-------------------
|
|
|
- removeDuplicate(rovRecallRank);
|
|
|
-
|
|
|
- //-------------------排-------------------
|
|
|
- //-------------------序-------------------
|
|
|
- //-------------------逻-------------------
|
|
|
- //-------------------辑-------------------
|
|
|
- List<String> videoIdKeys = rovRecallRank.stream()
|
|
|
- .map(t -> param.getRankKeyPrefix() + t.getVideoId())
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoScores = this.redisTemplate.opsForValue().multiGet(videoIdKeys);
|
|
|
- if (CollectionUtils.isNotEmpty(videoScores)
|
|
|
- && videoScores.size() == rovRecallRank.size()) {
|
|
|
- for (int i = 0; i < videoScores.size(); i++) {
|
|
|
- rovRecallRank.get(i).setSortScore(NumberUtils.toDouble(videoScores.get(i), 0.0));
|
|
|
- }
|
|
|
- Collections.sort(rovRecallRank, Comparator.comparingDouble(o -> -o.getSortScore()));
|
|
|
- }
|
|
|
-
|
|
|
- //------------------- todo zhangbo 增加排序str ros模型逻辑 合并二者得分-------------------
|
|
|
- List<Video> videosWithModel = model(rovRecallRank, param);
|
|
|
- Map<String, Double> mergeWeight = this.mergeWeight == null? new HashMap<>(): this.mergeWeight;
|
|
|
- double alpha = mergeWeight.getOrDefault("alpha", 1.0D);
|
|
|
- double beta = mergeWeight.getOrDefault("beta", 0.0D);
|
|
|
- double gamma = mergeWeight.getOrDefault("gamma", 0.0D);
|
|
|
- for (Video v : videosWithModel){
|
|
|
- double score = alpha * v.getSortScore() + beta * v.getScoreStr() + gamma * v.getScoreRos();
|
|
|
- if (mergeWeight.containsKey("mul") && mergeWeight.getOrDefault("mul", 0.0D) > 0.5){
|
|
|
- score = alpha * v.getSortScore() + (beta + v.getScoreStr()) * (gamma + v.getScoreRos());
|
|
|
- }
|
|
|
- v.setScoreRegion(v.getSortScore());
|
|
|
- v.score = score;
|
|
|
- v.setSortScore(score);
|
|
|
- }
|
|
|
- videosWithModel.sort(Comparator.comparingDouble(o -> -o.score));
|
|
|
-
|
|
|
- //------------------- 增加日志 -------------------
|
|
|
- int size = 4;
|
|
|
- List<Long> oldRes = rovRecallRank.subList(0, Math.min(rovRecallRank.size(), size)).stream().map(r-> r.getVideoId()).collect(Collectors.toList());
|
|
|
- List<Long> newRes = videosWithModel.subList(0, Math.min(videosWithModel.size(), size)).stream().map(r-> r.getVideoId()).collect(Collectors.toList());
|
|
|
- int diffpos = 0;
|
|
|
- int difftop = 0;
|
|
|
- for (int i=0; i<newRes.size(); ++i){
|
|
|
- if (!oldRes.get(i).equals(newRes.get(i))){
|
|
|
- ++diffpos;
|
|
|
- }
|
|
|
- if (!oldRes.contains(newRes.get(i))){
|
|
|
- ++difftop;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return videosWithModel;
|
|
|
- }
|
|
|
-
|
|
|
- public List<Video> model(List<Video> videos, RankParam param){
|
|
|
- if (videos.isEmpty()){
|
|
|
- return videos;
|
|
|
- }
|
|
|
-
|
|
|
- RedisStandaloneConfiguration redisSC = new RedisStandaloneConfiguration();
|
|
|
- redisSC.setPort(6379);
|
|
|
- redisSC.setPassword("Wqsd@2019");
|
|
|
- redisSC.setHostName("r-bp1pi8wyv6lzvgjy5z.redis.rds.aliyuncs.com");
|
|
|
- RedisConnectionFactory connectionFactory = new JedisConnectionFactory(redisSC);
|
|
|
- RedisTemplate<String, String> redisTemplate = new RedisTemplate<>();
|
|
|
- redisTemplate.setConnectionFactory(connectionFactory);
|
|
|
- redisTemplate.setDefaultSerializer(new StringRedisSerializer());
|
|
|
- redisTemplate.afterPropertiesSet();
|
|
|
-
|
|
|
- // 0: 场景特征处理
|
|
|
- Map<String, String> sceneFeatureMap = this.getSceneFeature(param);
|
|
|
-
|
|
|
- // 1: user特征处理
|
|
|
- Map<String, String> userFeatureMap = new HashMap<>();
|
|
|
- if (param.getMid() != null && !param.getMid().isEmpty()){
|
|
|
- String midKey = "user_info_4video_" + param.getMid();
|
|
|
- String userFeatureStr = redisTemplate.opsForValue().get(midKey);
|
|
|
- if (userFeatureStr != null){
|
|
|
- try{
|
|
|
- userFeatureMap = JSONUtils.fromJson(userFeatureStr,
|
|
|
- new TypeToken<Map<String, String>>() {},
|
|
|
- userFeatureMap);
|
|
|
- }catch (Exception e){
|
|
|
- log.error(String.format("parse user json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
- }
|
|
|
- }else{
|
|
|
- return videos;
|
|
|
- }
|
|
|
- }
|
|
|
- final Set<String> userFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ));
|
|
|
- Iterator<Map.Entry<String, String>> iterator = userFeatureMap.entrySet().iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iterator.next();
|
|
|
- if (!userFeatureSet.contains(entry.getKey())) {
|
|
|
- iterator.remove();
|
|
|
- }
|
|
|
- }
|
|
|
- Map<String, String> f1 = RankExtractorUserFeature.getOriginFeature(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system"
|
|
|
- ))
|
|
|
- );
|
|
|
- Map<String, String> f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap);
|
|
|
- Map<String, String> f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ))
|
|
|
- );
|
|
|
- f1.putAll(f2);
|
|
|
- f1.putAll(f3);
|
|
|
-
|
|
|
- // 2-1: item特征处理
|
|
|
- final Set<String> itemFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"
|
|
|
- ));
|
|
|
-
|
|
|
- List<RankItem> rankItems = CommonCollectionUtils.toList(videos, RankItem::new);
|
|
|
- List<Long> videoIds = CommonCollectionUtils.toListDistinct(videos, Video::getVideoId);
|
|
|
- List<String> videoFeatureKeys = videoIds.stream().map(r-> "video_info_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoFeatures = redisTemplate.opsForValue().multiGet(videoFeatureKeys);
|
|
|
- if (videoFeatures != null){
|
|
|
- for (int i=0; i<videoFeatures.size(); ++i){
|
|
|
- String vF = videoFeatures.get(i);
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- if (vF == null){
|
|
|
- continue;
|
|
|
- }
|
|
|
- try{
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- Iterator<Map.Entry<String, String>> iteratorIn = vfMap.entrySet().iterator();
|
|
|
- while (iteratorIn.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iteratorIn.next();
|
|
|
- if (!itemFeatureSet.contains(entry.getKey())) {
|
|
|
- iteratorIn.remove();
|
|
|
- }
|
|
|
- }
|
|
|
- Map<String, String> f4 = RankExtractorItemFeature.getItemRateFeature(vfMap);
|
|
|
- Map<String, String> f5 = RankExtractorItemFeature.cntFeatureChange(vfMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"))
|
|
|
- );
|
|
|
- f4.putAll(f5);
|
|
|
- rankItems.get(i).setFeatureMap(f4);
|
|
|
- }catch (Exception e){
|
|
|
- log.error(String.format("parse video json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- // 2-2: item 实时特征处理
|
|
|
- List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
|
|
|
- List<String> rtFeaPart = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
|
|
|
- String hour = new SimpleDateFormat("HH").format(calendar.getTime());
|
|
|
- String rtFeaPart1day = date + hour;
|
|
|
- String rtFeaPart1h = date + hour;
|
|
|
- if (rtFeaPart != null){
|
|
|
- if (rtFeaPart.get(0) != null){
|
|
|
- rtFeaPart1day = rtFeaPart.get(0);
|
|
|
- }
|
|
|
- if (rtFeaPart.get(1) != null){
|
|
|
- rtFeaPart1h = rtFeaPart.get(1);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- List<String> videoRtKeys1 = videoIds.stream().map(r-> "item_rt_fea_1day_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoRtKeys2 = videoIds.stream().map(r-> "item_rt_fea_1h_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- videoRtKeys1.addAll(videoRtKeys2);
|
|
|
- List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys1);
|
|
|
-
|
|
|
-
|
|
|
- if (videoRtFeatures != null){
|
|
|
- int j = 0;
|
|
|
- for (RankItem item: rankItems){
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null){
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try{
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()){
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null){
|
|
|
- continue;
|
|
|
- }
|
|
|
- String [] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1){
|
|
|
- String [] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
- }
|
|
|
- }catch (Exception e){
|
|
|
- log.error(String.format("parse video item_rt_fea_1day_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
- }
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1day);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
- }
|
|
|
- for (RankItem item: rankItems){
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null){
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try{
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()){
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null){
|
|
|
- continue;
|
|
|
- }
|
|
|
- String [] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1){
|
|
|
- String [] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
- }
|
|
|
- }catch (Exception e){
|
|
|
- log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}", this.CLASS_NAME, e));
|
|
|
- }
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1h);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
- List<RankItem> rovRecallScore = ScorerUtils.getScorerPipeline(ScorerUtils.BASE_CONF)
|
|
|
- .scoring(sceneFeatureMap, userFeatureMap, rankItems);
|
|
|
- return CommonCollectionUtils.toList(rovRecallScore, i -> {
|
|
|
- // hard code 将排序分数 赋值给video的sortScore
|
|
|
- Video v = i.getVideo();
|
|
|
- v.setScoreStr(i.getScoreStr());
|
|
|
- v.setScoreRos(i.getScoreRos());
|
|
|
- return v;
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, String> getSceneFeature(RankParam param) {
|
|
|
- Map<String, String> sceneFeatureMap = new HashMap<>();
|
|
|
- String provinceCn = param.getProvince();
|
|
|
- provinceCn = provinceCn.replaceAll("省$", "");
|
|
|
- sceneFeatureMap.put("ctx_region", provinceCn);
|
|
|
- String city = param.getCity();
|
|
|
- if ("台北市".equals(city) |
|
|
|
- "高雄市".equals(city) |
|
|
|
- "台中市".equals(city) |
|
|
|
- "桃园市".equals(city) |
|
|
|
- "新北市".equals(city) |
|
|
|
- "台南市".equals(city) |
|
|
|
- "基隆市".equals(city) |
|
|
|
- "吉林市".equals(city) |
|
|
|
- "新竹市".equals(city) |
|
|
|
- "嘉义市".equals(city)
|
|
|
- ){
|
|
|
- }else{
|
|
|
- city = city.replaceAll("市$", "");
|
|
|
- }
|
|
|
- sceneFeatureMap.put("ctx_city", city);
|
|
|
-
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- sceneFeatureMap.put("ctx_week", (calendar.get(Calendar.DAY_OF_WEEK) + 6) % 7 + "");
|
|
|
- sceneFeatureMap.put("ctx_hour", new SimpleDateFormat("HH").format(calendar.getTime()));
|
|
|
-
|
|
|
- return sceneFeatureMap;
|
|
|
- }
|
|
|
-
|
|
|
- public static void main(String[] args) {
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- calendar.set(Calendar.YEAR, 2022);
|
|
|
- calendar.set(Calendar.MONTH, 0); // January is 0
|
|
|
- calendar.set(Calendar.DAY_OF_MONTH, 1);
|
|
|
- calendar.set(Calendar.HOUR_OF_DAY, 0);
|
|
|
- calendar.set(Calendar.MINUTE, 12);
|
|
|
- calendar.set(Calendar.SECOND, 30);
|
|
|
- System.out.println(new SimpleDateFormat("HH").format(calendar.getTime()));
|
|
|
-
|
|
|
- String provinceCn = "吉林省2";
|
|
|
- provinceCn = provinceCn.replaceAll("省$", "");
|
|
|
- System.out.println(provinceCn);
|
|
|
- }
|
|
|
-
|
|
|
-}
|