|
@@ -1,237 +0,0 @@
|
|
|
-package com.tzld.piaoquan.recommend.server.service.recall.strategy;
|
|
|
-
|
|
|
-import com.google.common.reflect.TypeToken;
|
|
|
-import com.tzld.piaoquan.recommend.server.model.Video;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.filter.FilterParam;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.filter.FilterResult;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.filter.RegionFilterService;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.recall.FilterParamFactory;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.recall.RecallParam;
|
|
|
-import com.tzld.piaoquan.recommend.server.service.recall.RecallStrategy;
|
|
|
-import com.tzld.piaoquan.recommend.server.util.JSONUtils;
|
|
|
-import lombok.extern.slf4j.Slf4j;
|
|
|
-import org.apache.commons.collections4.CollectionUtils;
|
|
|
-import org.springframework.beans.factory.annotation.Autowired;
|
|
|
-import org.springframework.beans.factory.annotation.Qualifier;
|
|
|
-import org.springframework.data.redis.core.RedisTemplate;
|
|
|
-import org.springframework.stereotype.Component;
|
|
|
-
|
|
|
-import java.util.*;
|
|
|
-import java.util.stream.Collectors;
|
|
|
-
|
|
|
-/**
|
|
|
- * @author zhangbo
|
|
|
- */
|
|
|
-@Component
|
|
|
-@Slf4j
|
|
|
-public class TitleTagRecallStrategyV1 implements RecallStrategy {
|
|
|
- private final String CLASS_NAME = this.getClass().getSimpleName();
|
|
|
- @Autowired
|
|
|
- private RegionFilterService filterService;
|
|
|
- @Autowired
|
|
|
- @Qualifier("redisTemplate")
|
|
|
- public RedisTemplate<String, String> redisTemplate;
|
|
|
- @Override
|
|
|
- public List<Video> recall(RecallParam param) {
|
|
|
- long t0 = System.currentTimeMillis();
|
|
|
- List<Video> result = new ArrayList<>();
|
|
|
- // 1 获取头部vid,请求redis得到tag。
|
|
|
- Long headVid = param.getVideoId();
|
|
|
- String key1 = "redis:vid_title_tags:" + headVid;
|
|
|
- String value1 = redisTemplate.opsForValue().get(key1);
|
|
|
- if (value1 == null || value1.isEmpty()){
|
|
|
- return result;
|
|
|
- }
|
|
|
- Map<String, String> vfMap = new HashMap<>();
|
|
|
- vfMap = JSONUtils.fromJson(value1, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- String tags = vfMap.getOrDefault("tags","");
|
|
|
- if (tags.isEmpty()){
|
|
|
- return result;
|
|
|
- }
|
|
|
- String title = vfMap.getOrDefault("title","");
|
|
|
- // 2 通过tags请求redis,得到list。
|
|
|
- List<String> key2 = Arrays.stream(tags.split(",")).map(r -> "redis:tag2vids:"+r).collect(Collectors.toList());
|
|
|
- List<String> value2 = redisTemplate.opsForValue().multiGet(key2);
|
|
|
- if (value2 == null || value2.isEmpty()){
|
|
|
- return result;
|
|
|
- }
|
|
|
- // 3 合并多个tag的结果。
|
|
|
- Map<Long, VideoTmp> vid2Info = new HashMap<>();
|
|
|
- for (String v2: value2){
|
|
|
- if (v2 == null || v2.isEmpty()){
|
|
|
- continue;
|
|
|
- }
|
|
|
- vfMap = new HashMap<>();
|
|
|
- vfMap = JSONUtils.fromJson(v2, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- List<Long> vids = new ArrayList<>();
|
|
|
- List<Double> scores = new ArrayList<>();
|
|
|
- try{
|
|
|
- vids = Arrays.stream(vfMap.getOrDefault("videoid_arr", "").split(","))
|
|
|
- .filter(s -> !s.trim().isEmpty() && s.matches("-?\\d+"))
|
|
|
- .map(Long::valueOf).limit(20).collect(Collectors.toList());
|
|
|
- scores = Arrays.stream(vfMap.getOrDefault("score_arr", "").split(","))
|
|
|
- .map(Double::valueOf).limit(20).collect(Collectors.toList());
|
|
|
- }catch(Exception e){
|
|
|
- log.error(String.format("json parse is wrong in {}, key={}, error={}", CLASS_NAME, v2, e));
|
|
|
- vids = new ArrayList<>();
|
|
|
- scores = new ArrayList<>();
|
|
|
- }
|
|
|
- if (vids.size() != scores.size() || vids.isEmpty()){
|
|
|
- continue;
|
|
|
- }
|
|
|
- for (int i = 0; i < vids.size(); ++i){
|
|
|
- Long id = vids.get(i);
|
|
|
- if (id.equals(headVid)){
|
|
|
- continue;
|
|
|
- }
|
|
|
- Double score = scores.get(i);
|
|
|
- if (vid2Info.containsKey(id)){
|
|
|
- VideoTmp videoTmp = vid2Info.get(id);
|
|
|
- videoTmp.cnt += 1;
|
|
|
- videoTmp.score += score;
|
|
|
- vid2Info.put(id, videoTmp);
|
|
|
- }else{
|
|
|
- VideoTmp videoTmp = new VideoTmp();
|
|
|
- videoTmp.id = id;
|
|
|
- videoTmp.cnt = 1;
|
|
|
- videoTmp.score = score;
|
|
|
- vid2Info.put(id, videoTmp);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- if (vid2Info.isEmpty()){
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
- List<Map.Entry<Long, VideoTmp>> entries = new ArrayList<>(vid2Info.entrySet());
|
|
|
- entries.sort(new Comparator<Map.Entry<Long, VideoTmp>>() {
|
|
|
- @Override
|
|
|
- public int compare(Map.Entry<Long, VideoTmp> o1, Map.Entry<Long, VideoTmp> o2) {
|
|
|
- // 先比较cnt,如果cnt相同则比较score,如果score也相同则比较id
|
|
|
- int cmpCnt = Integer.compare(o2.getValue().cnt, o1.getValue().cnt); // 从大到小
|
|
|
- if (cmpCnt != 0) {
|
|
|
- return cmpCnt;
|
|
|
- }
|
|
|
- int cmpScore = Double.compare(o2.getValue().score, o1.getValue().score); // 从大到小
|
|
|
- if (cmpScore != 0) {
|
|
|
- return cmpScore;
|
|
|
- }
|
|
|
- return Long.compare(o2.getKey(), o1.getKey()); // 从大到小
|
|
|
- }
|
|
|
- });
|
|
|
- // 现在entries是按照cnt、score、id从大到小排序的,提取key到List<Long>
|
|
|
- List<Long> sortedKeys = new ArrayList<>();
|
|
|
- for (Map.Entry<Long, VideoTmp> entry : entries) {
|
|
|
- sortedKeys.add(entry.getKey());
|
|
|
- }
|
|
|
-
|
|
|
- // 4 集体走title匹配过滤,推荐过滤。
|
|
|
- List<Long> deleteKey = new ArrayList<>();
|
|
|
- if (!title.isEmpty()){
|
|
|
- List<String> key3 = sortedKeys.stream().map(r -> "redis:vid_title_tags:"+r).collect(Collectors.toList());
|
|
|
- List<String> value3 = redisTemplate.opsForValue().multiGet(key3);
|
|
|
- if (value3 != null && !value3.isEmpty()){
|
|
|
- int j = 0;
|
|
|
- for (String v3 : value3){
|
|
|
- vfMap = new HashMap<>();
|
|
|
- vfMap = JSONUtils.fromJson(v3, new TypeToken<Map<String, String>>() {}, vfMap);
|
|
|
- String titleTmp = vfMap.getOrDefault("title","");
|
|
|
- if (!titleTmp.isEmpty() && calculateStringSimilarity(title, titleTmp) >= 0.9){
|
|
|
- deleteKey.add(sortedKeys.get(j));
|
|
|
- }
|
|
|
- j++;
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- sortedKeys.removeIf(deleteKey::contains);
|
|
|
- if (sortedKeys.isEmpty()){
|
|
|
- return result;
|
|
|
- }
|
|
|
-
|
|
|
- FilterParam filterParam = FilterParamFactory.create(param, sortedKeys);
|
|
|
- FilterResult filterResult = filterService.filter(filterParam);
|
|
|
- List<Video> videosResult = new ArrayList<>();
|
|
|
- if (filterResult != null && CollectionUtils.isNotEmpty(filterResult.getVideoIds())) {
|
|
|
- filterResult.getVideoIds().forEach(vid -> {
|
|
|
- Video video = new Video();
|
|
|
- video.setVideoId(vid);
|
|
|
- video.setAbCode(param.getAbCode());
|
|
|
- double score = 0.0;
|
|
|
- if (vid2Info.containsKey(vid)){
|
|
|
- VideoTmp videoTmp = vid2Info.get(vid);
|
|
|
- score = videoTmp.score / videoTmp.cnt;
|
|
|
- }
|
|
|
- video.setRovScore(score);
|
|
|
- video.setPushFrom(pushFrom());
|
|
|
- videosResult.add(video);
|
|
|
- });
|
|
|
- }
|
|
|
-
|
|
|
- // 5 内部日志打印
|
|
|
-
|
|
|
- // 6 返回结果
|
|
|
- return videosResult;
|
|
|
- }
|
|
|
-
|
|
|
- public static final String PUSH_FORM = "recall_strategy_title_tag";
|
|
|
-
|
|
|
- @Override
|
|
|
- public String pushFrom() {
|
|
|
- return PUSH_FORM;
|
|
|
- }
|
|
|
-
|
|
|
- static class VideoTmp{
|
|
|
- Long id;
|
|
|
- Integer cnt;
|
|
|
- Double score;
|
|
|
- }
|
|
|
-
|
|
|
- public static int calculateLevenshteinDistance(String s1, String s2) {
|
|
|
- if (s1 == null || s2 == null) {
|
|
|
- throw new IllegalArgumentException("Strings must not be null");
|
|
|
- }
|
|
|
-
|
|
|
- int len0 = s1.length() + 1;
|
|
|
- int len1 = s2.length() + 1;
|
|
|
-
|
|
|
- // 创建一个二维数组来保存已经计算过的子问题的解
|
|
|
- int[][] dp = new int[len0][len1];
|
|
|
-
|
|
|
- // 初始化第一行和第一列
|
|
|
- for (int i = 0; i < len0; i++) {
|
|
|
- dp[i][0] = i;
|
|
|
- }
|
|
|
- for (int j = 0; j < len1; j++) {
|
|
|
- dp[0][j] = j;
|
|
|
- }
|
|
|
-
|
|
|
- // 动态规划计算Levenshtein距离
|
|
|
- for (int i = 1; i < len0; i++) {
|
|
|
- for (int j = 1; j < len1; j++) {
|
|
|
- int cost = (s1.charAt(i - 1) == s2.charAt(j - 1)) ? 0 : 1;
|
|
|
- dp[i][j] = Math.min(
|
|
|
- Math.min(dp[i - 1][j] + 1, dp[i][j - 1] + 1),
|
|
|
- dp[i - 1][j - 1] + cost
|
|
|
- );
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- return dp[len0 - 1][len1 - 1];
|
|
|
- }
|
|
|
-
|
|
|
- // 计算字符串匹配度(0到1之间)
|
|
|
- public static double calculateStringSimilarity(String s1, String s2) {
|
|
|
- int distance = calculateLevenshteinDistance(s1, s2);
|
|
|
- int maxLength = Math.max(s1.length(), s2.length());
|
|
|
- // 如果maxLength为0,则两个字符串都是空的,返回1表示完全匹配
|
|
|
- return maxLength == 0 ? 1.0 : 1.0 - ((double) distance / maxLength);
|
|
|
- }
|
|
|
-
|
|
|
- public static void main(String[] args) {
|
|
|
- String s1 = "老年人都来看看那";
|
|
|
- String s2 = "老年人都来看看";
|
|
|
- double score = calculateStringSimilarity(s1, s2);
|
|
|
- System.out.println(score);
|
|
|
- }
|
|
|
-
|
|
|
-}
|