|
@@ -1,627 +0,0 @@
|
|
|
-package com.tzld.piaoquan.recommend.server.implement;
|
|
|
-
|
|
|
-
|
|
|
-import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
-import com.google.common.base.Stopwatch;
|
|
|
-import com.google.common.reflect.TypeToken;
|
|
|
-import com.tzld.piaoquan.recommend.server.common.base.RankItem;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.candidiate.Candidate;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.candidiate.CandidateInfo;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.common.User;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.merger.MergeUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.merger.StrategyQueue;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.recaller.BaseRecaller;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.recaller.provider.RedisBackedQueue;
|
|
|
-import com.tzld.piaoquan.recommend.server.framework.utils.RedisSmartClient;
|
|
|
-import com.tzld.piaoquan.recommend.server.gen.recommend.RecommendRequest;
|
|
|
-import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.ExtractorUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorItemFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.rank.extractor.RankExtractorUserFeature;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.score.ScorerUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.CommonCollectionUtils;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.JSONUtils;
|
|
|
-import org.apache.commons.collections4.CollectionUtils;
|
|
|
-import org.slf4j.Logger;
|
|
|
-import org.slf4j.LoggerFactory;
|
|
|
-import org.springframework.beans.factory.annotation.Autowired;
|
|
|
-import org.springframework.beans.factory.annotation.Qualifier;
|
|
|
-import org.springframework.beans.factory.annotation.Value;
|
|
|
-import org.springframework.data.redis.core.RedisTemplate;
|
|
|
-import org.springframework.stereotype.Service;
|
|
|
-
|
|
|
-import javax.annotation.PostConstruct;
|
|
|
-import javax.annotation.Resource;
|
|
|
-import java.text.SimpleDateFormat;
|
|
|
-import java.util.*;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-@Service
|
|
|
-public class TopRecommendPipeline {
|
|
|
-
|
|
|
- private static final Logger log = LoggerFactory.getLogger(TopRecommendPipeline.class);
|
|
|
-
|
|
|
- public static final String MERGE_CONF = "merge_config.conf";
|
|
|
-
|
|
|
- @Value("${recommend.recall.num:500}")
|
|
|
- private int recallNum;
|
|
|
- @ApolloJsonValue("${rank.score.merge.weightv547:}")
|
|
|
- private Map<String, Double> mergeWeight;
|
|
|
- @Resource
|
|
|
- private RedisSmartClient client;
|
|
|
- @Resource
|
|
|
- private RedisTemplate<String, String> redisTemplate;
|
|
|
-
|
|
|
- @Qualifier("featureRedisTemplate")
|
|
|
- @Autowired
|
|
|
- private RedisTemplate<String, String> featureRedisTemplate;
|
|
|
-
|
|
|
- private RedisBackedQueue queueProvider;
|
|
|
-
|
|
|
- @PostConstruct
|
|
|
- public void init() {
|
|
|
- queueProvider = new RedisBackedQueue(client, 15 * 60 * 1000L);
|
|
|
- mergeWeight = mergeWeight == null ? new HashMap<>() : mergeWeight;
|
|
|
- }
|
|
|
-
|
|
|
- public List<Video> feeds(final RecommendRequest requestData,
|
|
|
- final int requestIndex,
|
|
|
- final User userInfo, Boolean logPrint,
|
|
|
- Map<String, String> timeLogMap) {
|
|
|
- // Step 1: Attention extraction
|
|
|
- Stopwatch stopwatch = Stopwatch.createStarted();
|
|
|
- stopwatch.reset().start();
|
|
|
- timeLogMap.put("uid", userInfo.getUid());
|
|
|
- timeLogMap.put("mid", userInfo.getMid());
|
|
|
- timeLogMap.put("requestId", requestData.getRequestId());
|
|
|
-
|
|
|
- List<RankItem> rankItems = feedByRec(requestData, requestIndex, userInfo, logPrint, timeLogMap);
|
|
|
- timeLogMap.put("feedByRec", stopwatch.elapsed().toMillis() + "");
|
|
|
- if (rankItems == null || rankItems.isEmpty()) {
|
|
|
- return new ArrayList<>();
|
|
|
- }
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, feeds rankItems = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(rankItems));
|
|
|
- }
|
|
|
- stopwatch.reset().start();
|
|
|
- List<Video> videos = rankItem2Video(rankItems);
|
|
|
- timeLogMap.put("rankItem2Video", stopwatch.elapsed().toMillis() + "");
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, videos = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(videos));
|
|
|
- }
|
|
|
- return videos;
|
|
|
- }
|
|
|
-
|
|
|
- private List<Video> rankItem2Video(List<RankItem> rankItems) {
|
|
|
- List<Video> videos = new ArrayList<>();
|
|
|
- for (RankItem item : rankItems) {
|
|
|
- Video video = new Video();
|
|
|
- video.setVideoId(Long.parseLong(item.getId()));
|
|
|
- video.setPushFrom(item.getQueue());
|
|
|
- video.setScore(item.getScore());
|
|
|
- video.setSortScore(item.getScore());
|
|
|
- video.setScoreStr(item.getScoreStr());
|
|
|
- video.setScoresMap(item.getScoresMap());
|
|
|
-
|
|
|
- Map<String, List<String>> pushFromIndex = new HashMap<>();
|
|
|
- pushFromIndex.put(item.getQueue(), item.getCandidateInfoList().stream()
|
|
|
- .map(CandidateInfo::getCandidateQueueName).collect(Collectors.toList()));
|
|
|
- video.setPushFromIndex(pushFromIndex);
|
|
|
- videos.add(video);
|
|
|
- }
|
|
|
- videos.sort(Comparator.comparing(Video::getScore).reversed());
|
|
|
- return videos;
|
|
|
- }
|
|
|
-
|
|
|
- public List<Double> getStaticData(Map<String, Map<String, Double>> itemRealMap,
|
|
|
- List<String> datehours, String key) {
|
|
|
- List<Double> views = new LinkedList<>();
|
|
|
- Map<String, Double> tmp = itemRealMap.getOrDefault(key, new HashMap<>());
|
|
|
- for (String dh : datehours) {
|
|
|
- views.add(tmp.getOrDefault(dh, 0.0D) +
|
|
|
- (views.isEmpty() ? 0.0 : views.get(views.size() - 1))
|
|
|
- );
|
|
|
- }
|
|
|
- return views;
|
|
|
- }
|
|
|
-
|
|
|
- public Double calScoreWeight(List<Double> data){
|
|
|
- Double up = 0.0;
|
|
|
- Double down = 0.0;
|
|
|
- for (int i=0; i<data.size(); ++i){
|
|
|
- up += 1.0 / (i + 1) * data.get(i);
|
|
|
- down += 1.0 / (i + 1);
|
|
|
- }
|
|
|
- return down > 1E-8? up / down: 0.0;
|
|
|
- }
|
|
|
- public List<Double> getRateData(List<Double> ups, List<Double> downs, Double up, Double down){
|
|
|
- List<Double> data = new LinkedList<>();
|
|
|
- for(int i=0; i<ups.size(); ++i){
|
|
|
- data.add(
|
|
|
- (ups.get(i) + up) / (downs.get(i) + down)
|
|
|
- );
|
|
|
- }
|
|
|
- return data;
|
|
|
- }
|
|
|
-
|
|
|
- public List<RankItem> feedByRec(final RecommendRequest requestData,
|
|
|
- final int requestIndex,
|
|
|
- final User userInfo, Boolean logPrint,
|
|
|
- Map<String, String> timeLogMap) {
|
|
|
- Stopwatch stopwatch = Stopwatch.createStarted();
|
|
|
- // Step 2: create top queue
|
|
|
- stopwatch.reset().start();
|
|
|
- StrategyQueue topQueue = MergeUtils.createTopQueue(MERGE_CONF, "top-queue");
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, topQueue = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(topQueue));
|
|
|
- }
|
|
|
- timeLogMap.put("createTopQueue", stopwatch.elapsed().toMillis() + "");
|
|
|
-
|
|
|
- // Step 3: Candidate
|
|
|
- stopwatch.reset().start();
|
|
|
- Map<String, Candidate> candidates = new HashMap<String, Candidate>();
|
|
|
- topQueue.candidate(candidates, recallNum, userInfo, requestData, 0, 0);
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, candidates = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(candidates));
|
|
|
- }
|
|
|
- timeLogMap.put("topQueue-candidate-cost", stopwatch.elapsed().toMillis() + "");
|
|
|
-
|
|
|
-
|
|
|
- // Step 4: Recalling & Basic Scoring
|
|
|
- stopwatch.reset().start();
|
|
|
- BaseRecaller recaller = new BaseRecaller(queueProvider);
|
|
|
- List<RankItem> items = recaller.recalling(requestData, userInfo, new ArrayList<>(candidates.values()));
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, items = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(items));
|
|
|
- }
|
|
|
- timeLogMap.put("recalling-cost", stopwatch.elapsed().toMillis() + "");
|
|
|
- timeLogMap.put("recalling-size", items == null ? "0" : items.size() + "");
|
|
|
-
|
|
|
- if (CollectionUtils.isEmpty(items)) {
|
|
|
- return new ArrayList<>();
|
|
|
- }
|
|
|
-
|
|
|
- // Step 4: Advance Scoring
|
|
|
- stopwatch.reset().start();
|
|
|
- List<RankItem> rankItemList = videoScoredByFeature(items, requestData);
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, rankItemList = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(rankItemList));
|
|
|
- }
|
|
|
-
|
|
|
- stopwatch.reset().start();
|
|
|
- // Step 5: Merger
|
|
|
- MergeUtils.distributeItemsToMultiQueues(topQueue, rankItemList);
|
|
|
- topQueue.merge(recallNum * 3, userInfo, requestData, requestIndex, 0);
|
|
|
-
|
|
|
- // 多样性融合
|
|
|
- List<RankItem> mergeItems = topQueue.getItems();
|
|
|
- if (CollectionUtils.isEmpty(mergeItems)) {
|
|
|
- return new ArrayList<>();
|
|
|
- }
|
|
|
- duplicate(mergeItems);
|
|
|
-
|
|
|
- timeLogMap.put("mergeItems-cost", stopwatch.elapsed().toMillis() + "");
|
|
|
- timeLogMap.put("mergeItems-size", mergeItems.size() + "");
|
|
|
-
|
|
|
- if (logPrint) {
|
|
|
- log.info("traceId = {}, cost = {}, mergeItems = {}", requestData.getRequestId(),
|
|
|
- stopwatch.elapsed().toMillis(), JSONUtils.toJson(mergeItems));
|
|
|
- }
|
|
|
-// MergeUtils.diversityRerank(mergeItems, SimilarityUtils.getIsSameUserTagOrCategoryFunc(), recallNum, 6, 2);
|
|
|
-
|
|
|
- // Step 6: Global Rank & subList
|
|
|
- return mergeItems;
|
|
|
- }
|
|
|
-
|
|
|
- public Double calScoreWeightNoTimeDecay(List<Double> data) {
|
|
|
- Double up = 0.0;
|
|
|
- Double down = 0.0;
|
|
|
- for (int i = 0; i < data.size(); ++i) {
|
|
|
- up += 1.0 * data.get(i);
|
|
|
- down += 1.0;
|
|
|
- }
|
|
|
- return down > 1E-8 ? up / down : 0.0;
|
|
|
- }
|
|
|
-
|
|
|
- private List<RankItem> videoScoredByFeature(List<RankItem> items, RecommendRequest recommendRequest) {
|
|
|
- // 1 模型分
|
|
|
- List<RankItem> rankItemList = model(items, recommendRequest);
|
|
|
- List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
|
|
|
- List<String> rtFeaPartKeyResult = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
|
|
|
- String hour = new SimpleDateFormat("HH").format(calendar.getTime());
|
|
|
- String rtFeaPart1h = date + hour;
|
|
|
- if (rtFeaPartKeyResult != null) {
|
|
|
- if (rtFeaPartKeyResult.get(1) != null) {
|
|
|
- rtFeaPart1h = rtFeaPartKeyResult.get(1);
|
|
|
- }
|
|
|
- }
|
|
|
- // 2 统计分 3H
|
|
|
- String cur = rtFeaPart1h;
|
|
|
- List<String> datehours = new LinkedList<>(); // 时间是倒叙的
|
|
|
- for (int i = 0; i < 3; ++i) {
|
|
|
- datehours.add(cur);
|
|
|
- cur = ExtractorUtils.subtractHours(cur, 1);
|
|
|
- }
|
|
|
- for (RankItem item : rankItemList) {
|
|
|
- Map<String, String> itemBasicMap = item.getItemBasicFeature();
|
|
|
- Map<String, Map<String, Double>> itemRealMap = item.getItemRealTimeFeature();
|
|
|
- List<Double> views = getStaticData(itemRealMap, datehours, "view_pv_list_1h");
|
|
|
- List<Double> plays = getStaticData(itemRealMap, datehours, "play_pv_list_1h");
|
|
|
- List<Double> shares = getStaticData(itemRealMap, datehours, "share_pv_list_1h");
|
|
|
- List<Double> preturns = getStaticData(itemRealMap, datehours, "p_return_uv_list_1h");
|
|
|
- List<Double> allreturns = getStaticData(itemRealMap, datehours, "return_uv_list_1h");
|
|
|
-
|
|
|
- List<Double> share2return = getRateData(preturns, shares, 1.0, 1000.0);
|
|
|
- Double share2returnScore = calScoreWeightNoTimeDecay(share2return);
|
|
|
- List<Double> view2return = getRateData(preturns, views, 1.0, 1000.0);
|
|
|
- Double view2returnScore = calScoreWeightNoTimeDecay(view2return);
|
|
|
- List<Double> view2play = getRateData(plays, views, 1.0, 1000.0);
|
|
|
- Double view2playScore = calScoreWeightNoTimeDecay(view2play);
|
|
|
- List<Double> play2share = getRateData(shares, plays, 1.0, 1000.0);
|
|
|
- Double play2shareScore = calScoreWeightNoTimeDecay(play2share);
|
|
|
- item.scoresMap.put("share2returnScore", share2returnScore);
|
|
|
- item.scoresMap.put("view2returnScore", view2returnScore);
|
|
|
- item.scoresMap.put("view2playScore", view2playScore);
|
|
|
- item.scoresMap.put("play2shareScore", play2shareScore);
|
|
|
-
|
|
|
- // 全部回流的rov和ros
|
|
|
- List<Double> share2allreturn = getRateData(allreturns, shares, 1.0, 10.0);
|
|
|
- Double share2allreturnScore = calScoreWeightNoTimeDecay(share2allreturn);
|
|
|
- List<Double> view2allreturn = getRateData(allreturns, views, 0.0, 0.0);
|
|
|
- Double view2allreturnScore = calScoreWeightNoTimeDecay(view2allreturn);
|
|
|
- item.scoresMap.put("share2allreturnScore", share2allreturnScore);
|
|
|
- item.scoresMap.put("view2allreturnScore", view2allreturnScore);
|
|
|
-
|
|
|
- // 全部回流
|
|
|
- Double allreturnsScore = calScoreWeightNoTimeDecay(allreturns);
|
|
|
- item.scoresMap.put("allreturnsScore", allreturnsScore);
|
|
|
-
|
|
|
- // 平台回流
|
|
|
- Double preturnsScore = calScoreWeightNoTimeDecay(preturns);
|
|
|
- item.scoresMap.put("preturnsScore", preturnsScore);
|
|
|
-
|
|
|
- // 平台回流ROV
|
|
|
- List<Double> view2PreReturns = getRateData(preturns, views, 0.0, 0.0);
|
|
|
- Double view2PreReturnsScore = calScoreWeightNoTimeDecay(view2PreReturns);
|
|
|
- item.scoresMap.put("view2PreReturnsScore", view2PreReturnsScore);
|
|
|
-
|
|
|
- // 平台回流ROS
|
|
|
- List<Double> share2PreReturns = getRateData(preturns, shares, 1.0, 10.0);
|
|
|
- Double share2PreReturnsScore = calScoreWeightNoTimeDecay(share2PreReturns);
|
|
|
- item.scoresMap.put("share2PreReturnsScore", share2PreReturnsScore);
|
|
|
-
|
|
|
-
|
|
|
- // rov的趋势
|
|
|
- double trendScore = calTrendScore(view2return);
|
|
|
- item.scoresMap.put("trendScore", trendScore);
|
|
|
-
|
|
|
- // 新视频提取
|
|
|
- double newVideoScore = calNewVideoScore(itemBasicMap);
|
|
|
- item.scoresMap.put("newVideoScore", newVideoScore);
|
|
|
-
|
|
|
- }
|
|
|
- // 3 融合公式
|
|
|
-// double a = mergeWeight.getOrDefault("a", 0.1);
|
|
|
-// double b = mergeWeight.getOrDefault("b", 0.0);
|
|
|
-// double c = mergeWeight.getOrDefault("c", 0.000001);
|
|
|
-// double d = mergeWeight.getOrDefault("d", 1.0);
|
|
|
-// double e = mergeWeight.getOrDefault("e", 1.0);
|
|
|
-// double f = mergeWeight.getOrDefault("f", 0.8);
|
|
|
-// double g = mergeWeight.getOrDefault("g", 2.0);
|
|
|
-// double h = mergeWeight.getOrDefault("h", 240.0);
|
|
|
-// double ifAdd = mergeWeight.getOrDefault("ifAdd", 1.0);
|
|
|
- for (RankItem item : rankItemList) {
|
|
|
-// double trendScore = item.scoresMap.getOrDefault("trendScore", 0.0) > 1E-8 ?
|
|
|
-// item.scoresMap.getOrDefault("trendScore", 0.0) : 0.0;
|
|
|
-// double newVideoScore = item.scoresMap.getOrDefault("newVideoScore", 0.0) > 1E-8 ?
|
|
|
-// item.scoresMap.getOrDefault("newVideoScore", 0.0) : 0.0;
|
|
|
-// double strScore = item.getScoreStr();
|
|
|
-// double rosScore = item.scoresMap.getOrDefault("share2returnScore", 0.0);
|
|
|
- double share2allreturnScore = item.scoresMap.getOrDefault("share2allreturnScore", 0.0);
|
|
|
- double view2allreturnScore = item.scoresMap.getOrDefault("view2allreturnScore", 0.0);
|
|
|
-// double preturnsScore = Math.log(1 + item.scoresMap.getOrDefault("preturnsScore", 0.0));
|
|
|
-// Double view2PreReturnsScore = item.scoresMap.getOrDefault("view2PreReturnsScore", 0.0);
|
|
|
-// Double share2PreReturnsScore = item.scoresMap.getOrDefault("share2PreReturnsScore", 0.0);
|
|
|
- // if NaN set 0
|
|
|
- if (Double.isNaN(share2allreturnScore)) {
|
|
|
- share2allreturnScore = 0.0;
|
|
|
- }
|
|
|
- if (Double.isNaN(view2allreturnScore)) {
|
|
|
- view2allreturnScore = 0.0;
|
|
|
- }
|
|
|
- double score = share2allreturnScore + view2allreturnScore;
|
|
|
-// if (ifAdd < 0.5) {
|
|
|
-// score = Math.pow(strScore, a) * Math.pow(rosScore, b) + c * preturnsScore +
|
|
|
-// (newVideoScore > 1E-8 ? d * trendScore * (e + newVideoScore) : 0.0);
|
|
|
-// } else {
|
|
|
-// score = a * strScore + b * rosScore + c * preturnsScore +
|
|
|
-// (newVideoScore > 1E-8 ? d * trendScore * (e + newVideoScore) : 0.0);
|
|
|
-//
|
|
|
-// }
|
|
|
-// double allreturnsScore = item.scoresMap.getOrDefault("allreturnsScore", 0.0);
|
|
|
-// if (allreturnsScore > h) {
|
|
|
-// score += (f * share2allreturnScore + g * view2allreturnScore);
|
|
|
-// }
|
|
|
- // 设置计算好的分数
|
|
|
- item.setScore(score);
|
|
|
- }
|
|
|
- return rankItemList;
|
|
|
- }
|
|
|
-
|
|
|
- private List<RankItem> model(List<RankItem> items, RecommendRequest param) {
|
|
|
- if (items.isEmpty()) {
|
|
|
- return items;
|
|
|
- }
|
|
|
-
|
|
|
- // 0: 场景特征处理
|
|
|
- Map<String, String> sceneFeatureMap = this.getSceneFeature(param);
|
|
|
-
|
|
|
- // 1: user特征处理
|
|
|
- Map<String, String> userFeatureMap = new HashMap<>();
|
|
|
- if (param.getMid() != null && !param.getMid().isEmpty()) {
|
|
|
- String midKey = "user_info_4video_" + param.getMid();
|
|
|
- String userFeatureStr = featureRedisTemplate.opsForValue().get(midKey);
|
|
|
- if (userFeatureStr != null) {
|
|
|
- try {
|
|
|
- userFeatureMap = JSONUtils.fromJson(userFeatureStr,
|
|
|
- new TypeToken<Map<String, String>>() {
|
|
|
- },
|
|
|
- userFeatureMap);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse user json is wrong in {} with {}", this.getClass().getSimpleName(), e));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- final Set<String> userFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system",
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ));
|
|
|
- Iterator<Map.Entry<String, String>> iterator = userFeatureMap.entrySet().iterator();
|
|
|
- while (iterator.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iterator.next();
|
|
|
- if (!userFeatureSet.contains(entry.getKey())) {
|
|
|
- iterator.remove();
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- Map<String, String> f1 = RankExtractorUserFeature.getOriginFeature(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "machineinfo_brand", "machineinfo_model", "machineinfo_platform", "machineinfo_system"
|
|
|
- ))
|
|
|
- );
|
|
|
- Map<String, String> f2 = RankExtractorUserFeature.getUserRateFeature(userFeatureMap);
|
|
|
- Map<String, String> f3 = RankExtractorUserFeature.cntFeatureChange(userFeatureMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "u_1day_exp_cnt", "u_1day_click_cnt", "u_1day_share_cnt", "u_1day_return_cnt",
|
|
|
- "u_3day_exp_cnt", "u_3day_click_cnt", "u_3day_share_cnt", "u_3day_return_cnt"
|
|
|
- ))
|
|
|
- );
|
|
|
- f1.putAll(f2);
|
|
|
- f1.putAll(f3);
|
|
|
-
|
|
|
- // 2-1: item特征处理
|
|
|
- final Set<String> itemFeatureSet = new HashSet<>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"
|
|
|
- ));
|
|
|
-
|
|
|
- List<String> videoIds = CommonCollectionUtils.toListDistinct(items, RankItem::getId);
|
|
|
- List<String> videoFeatureKeys = videoIds.stream().map(r -> "video_info_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoFeatures = featureRedisTemplate.opsForValue().multiGet(videoFeatureKeys);
|
|
|
- if (videoFeatures != null) {
|
|
|
- for (int i = 0; i < videoFeatures.size(); ++i) {
|
|
|
- String vF = videoFeatures.get(i);
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
- Map<String, String> vfMapCopy = new HashMap<>(vfMap);
|
|
|
- items.get(i).setItemBasicFeature(vfMapCopy);
|
|
|
- Iterator<Map.Entry<String, String>> iteratorIn = vfMap.entrySet().iterator();
|
|
|
- while (iteratorIn.hasNext()) {
|
|
|
- Map.Entry<String, String> entry = iteratorIn.next();
|
|
|
- if (!itemFeatureSet.contains(entry.getKey())) {
|
|
|
- iteratorIn.remove();
|
|
|
- }
|
|
|
- }
|
|
|
- Map<String, String> f4 = RankExtractorItemFeature.getItemRateFeature(vfMap);
|
|
|
- Map<String, String> f5 = RankExtractorItemFeature.cntFeatureChange(vfMap,
|
|
|
- new HashSet<String>(Arrays.asList(
|
|
|
- "total_time", "play_count_total",
|
|
|
- "i_1day_exp_cnt", "i_1day_click_cnt", "i_1day_share_cnt", "i_1day_return_cnt",
|
|
|
- "i_3day_exp_cnt", "i_3day_click_cnt", "i_3day_share_cnt", "i_3day_return_cnt"))
|
|
|
- );
|
|
|
- f4.putAll(f5);
|
|
|
- items.get(i).setFeatureMap(f4);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video json is wrong in {} with {}", this.getClass().getSimpleName(), e));
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- // 2-2: item 实时特征处理
|
|
|
- List<String> rtFeaPartKey = new ArrayList<>(Arrays.asList("item_rt_fea_1day_partition", "item_rt_fea_1h_partition"));
|
|
|
- List<String> rtFeaPartKeyResult = this.redisTemplate.opsForValue().multiGet(rtFeaPartKey);
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- String date = new SimpleDateFormat("yyyyMMdd").format(calendar.getTime());
|
|
|
- String hour = new SimpleDateFormat("HH").format(calendar.getTime());
|
|
|
- String rtFeaPart1day = date + hour;
|
|
|
- String rtFeaPart1h = date + hour;
|
|
|
- if (rtFeaPartKeyResult != null) {
|
|
|
- if (rtFeaPartKeyResult.get(0) != null) {
|
|
|
- rtFeaPart1day = rtFeaPartKeyResult.get(0);
|
|
|
- }
|
|
|
- if (rtFeaPartKeyResult.get(1) != null) {
|
|
|
- rtFeaPart1h = rtFeaPartKeyResult.get(1);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- List<String> videoRtKeys1 = videoIds.stream().map(r -> "item_rt_fea_1day_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- List<String> videoRtKeys2 = videoIds.stream().map(r -> "item_rt_fea_1h_" + r)
|
|
|
- .collect(Collectors.toList());
|
|
|
- videoRtKeys1.addAll(videoRtKeys2);
|
|
|
- List<String> videoRtFeatures = this.redisTemplate.opsForValue().multiGet(videoRtKeys1);
|
|
|
-
|
|
|
-
|
|
|
- if (videoRtFeatures != null) {
|
|
|
- int j = 0;
|
|
|
- for (RankItem item : items) {
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- String[] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1) {
|
|
|
- String[] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
- }
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video item_rt_fea_1day_ json is wrong in {} with {}",
|
|
|
- this.getClass().getSimpleName(), e));
|
|
|
- }
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1day);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
- }
|
|
|
- for (RankItem item : items) {
|
|
|
- String vF = videoRtFeatures.get(j);
|
|
|
- ++j;
|
|
|
- if (vF == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- Map<String, Map<String, Double>> vfMapNew = new HashMap<>();
|
|
|
- try {
|
|
|
- vfMap = JSONUtils.fromJson(vF, new TypeToken<Map<String, String>>() {
|
|
|
- }, vfMap);
|
|
|
-
|
|
|
- for (Map.Entry<String, String> entry : vfMap.entrySet()) {
|
|
|
- String value = entry.getValue();
|
|
|
- if (value == null) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- String[] var1 = value.split(",");
|
|
|
- Map<String, Double> tmp = new HashMap<>();
|
|
|
- for (String var2 : var1) {
|
|
|
- String[] var3 = var2.split(":");
|
|
|
- tmp.put(var3[0], Double.valueOf(var3[1]));
|
|
|
- }
|
|
|
- vfMapNew.put(entry.getKey(), tmp);
|
|
|
- }
|
|
|
- item.setItemRealTimeFeature(vfMapNew);
|
|
|
- } catch (Exception e) {
|
|
|
- log.error(String.format("parse video item_rt_fea_1h_ json is wrong in {} with {}",
|
|
|
- this.getClass().getSimpleName(), e));
|
|
|
- }
|
|
|
- Map<String, String> f8 = RankExtractorItemFeature.getItemRealtimeRate(vfMapNew, rtFeaPart1h);
|
|
|
- item.getFeatureMap().putAll(f8);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
- List<RankItem> rovRecallScore = ScorerUtils.getScorerPipeline(ScorerUtils.BASE_CONF)
|
|
|
- .scoring(sceneFeatureMap, userFeatureMap, items);
|
|
|
- return rovRecallScore;
|
|
|
- }
|
|
|
-
|
|
|
- public double calNewVideoScore(Map<String, String> itemBasicMap) {
|
|
|
- double existenceDays = Double.valueOf(itemBasicMap.getOrDefault("existence_days", "30"));
|
|
|
- if (existenceDays > 5) {
|
|
|
- return 0.0;
|
|
|
- }
|
|
|
- double score = 1.0 / (existenceDays + 10.0);
|
|
|
- return score;
|
|
|
- }
|
|
|
-
|
|
|
- public double calTrendScore(List<Double> data) {
|
|
|
- double sum = 0.0;
|
|
|
- int size = data.size();
|
|
|
- for (int i = 0; i < size - 4; ++i) {
|
|
|
- sum += data.get(i) - data.get(i + 4);
|
|
|
- }
|
|
|
- if (sum * 10 > 0.6) {
|
|
|
- sum = 0.6;
|
|
|
- } else {
|
|
|
- sum = sum * 10;
|
|
|
- }
|
|
|
- if (sum > 0) {
|
|
|
- // 为了打断点
|
|
|
- sum = sum;
|
|
|
- }
|
|
|
- return sum;
|
|
|
- }
|
|
|
-
|
|
|
- private void duplicate(List<RankItem> items) {
|
|
|
- Set<String> ids = new HashSet<>();
|
|
|
- List<RankItem> result = new ArrayList<>();
|
|
|
- for (RankItem item : items) {
|
|
|
- if (ids.contains(item.getId())) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- ids.add(item.getId());
|
|
|
- result.add(item);
|
|
|
- }
|
|
|
- items.clear();
|
|
|
- items.addAll(result);
|
|
|
- }
|
|
|
-
|
|
|
- private Map<String, String> getSceneFeature(RecommendRequest param) {
|
|
|
- Map<String, String> sceneFeatureMap = new HashMap<>();
|
|
|
- String provinceCn = param.getProvince();
|
|
|
- provinceCn = provinceCn.replaceAll("省$", "");
|
|
|
- sceneFeatureMap.put("ctx_region", provinceCn);
|
|
|
- String city = param.getCity();
|
|
|
- if ("台北市".equals(city) |
|
|
|
- "高雄市".equals(city) |
|
|
|
- "台中市".equals(city) |
|
|
|
- "桃园市".equals(city) |
|
|
|
- "新北市".equals(city) |
|
|
|
- "台南市".equals(city) |
|
|
|
- "基隆市".equals(city) |
|
|
|
- "吉林市".equals(city) |
|
|
|
- "新竹市".equals(city) |
|
|
|
- "嘉义市".equals(city)
|
|
|
- ){
|
|
|
- }else{
|
|
|
- city = city.replaceAll("市$", "");
|
|
|
- }
|
|
|
- sceneFeatureMap.put("ctx_city", city);
|
|
|
-
|
|
|
- Calendar calendar = Calendar.getInstance();
|
|
|
- sceneFeatureMap.put("ctx_week", (calendar.get(Calendar.DAY_OF_WEEK) + 6) % 7 + "");
|
|
|
- sceneFeatureMap.put("ctx_hour", new SimpleDateFormat("HH").format(calendar.getTime()));
|
|
|
-
|
|
|
- return sceneFeatureMap;
|
|
|
- }
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-
|
|
|
-}
|