jiandong.liu před 23 hodinami
rodič
revize
017b4e96d7

+ 42 - 4
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/util/ExtractorUtils.java

@@ -64,18 +64,45 @@ public class ExtractorUtils {
     }
 
     public static Double[] funcC34567ForTagsNew(String tags, String title) {
+        return funcC34567ForTagsNewWithCache(tags, title, null);
+    }
+
+    /**
+     * 优化版本:支持缓存 title 和 tag 的分词结果
+     * @param tags 标签字符串,逗号分隔
+     * @param title 标题
+     * @param titleWords title 的分词结果缓存,如果为 null 则现场分词
+     * @param tagWordsCache tag 分词结果缓存(请求级别),可以为 null
+     * @return [匹配数量, 最大相似度, 平均相似度]
+     */
+    public static Double[] funcC34567ForTagsNewWithCache(String tags, String title, List<String> titleWords, Map<String, List<String>> tagWordsCache) {
+        if (tags == null || tags.isEmpty() || title == null || title.isEmpty()) {
+            return EMPTY_RESULT;
+        }
+
         String[] tagsList = tags.split(",");
         int d1 = 0;
-        List<String> d2 = new ArrayList<>();
         double d3 = 0.0;
         double d4 = 0.0;
 
+        // 只分词一次 title,复用结果
+        List<String> cachedTitleWords = (titleWords != null) ? titleWords : SimilarityUtils.segment(title);
+
         for (String tag : tagsList) {
+            if (tag == null || tag.isEmpty()) {
+                continue;
+            }
             if (title.contains(tag)) {
                 d1++;
-                d2.add(tag);
             }
-            double score = SimilarityUtils.word2VecSimilarity(tag, title);
+            // 使用请求级别的 tag 分词缓存
+            List<String> tagWords;
+            if (tagWordsCache != null) {
+                tagWords = tagWordsCache.computeIfAbsent(tag, SimilarityUtils::segment);
+            } else {
+                tagWords = SimilarityUtils.segment(tag);
+            }
+            float score = SimilarityUtils.word2VecSimilarityWithWords(tagWords, cachedTitleWords);
             if (score > d3) {
                 d3 = score;
             }
@@ -84,10 +111,21 @@ public class ExtractorUtils {
 
         d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
 
-        // 使用数组来返回多个值
         return new Double[]{(double) d1, d3, d4};
     }
 
+    /**
+     * 兼容旧接口
+     */
+    public static Double[] funcC34567ForTagsNewWithCache(String tags, String title, List<String> titleWords) {
+        return funcC34567ForTagsNewWithCache(tags, title, titleWords, null);
+    }
+
+    /**
+     * 空结果常量,避免重复创建数组
+     */
+    private static final Double[] EMPTY_RESULT = new Double[]{0.0, 0.0, 0.0};
+
     public static Double calDiv(double a, double b) {
         if (a == 0 || b == 0) {
             return 0D;

+ 40 - 1
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/util/SimilarityUtils.java

@@ -52,12 +52,51 @@ public final class SimilarityUtils {
         }
     }
 
-
+    /**
+     * 原始方法,保持兼容
+     */
     public static float word2VecSimilarity(String str1, String str2) {
         List<String> words1 = Segment.getWords(str1);
         List<String> words2 = Segment.getWords(str2);
         return vec.sentenceSimilarity(words1, words2);
     }
 
+    /**
+     * 优化版本:复用已分词的结果,避免重复分词
+     * @param words1 已分词的词列表
+     * @param words2 已分词的词列表
+     * @return 相似度分数
+     */
+    public static float word2VecSimilarityWithWords(List<String> words1, List<String> words2) {
+        if (words1 == null || words1.isEmpty() || words2 == null || words2.isEmpty()) {
+            return 0.0f;
+        }
+        return vec.sentenceSimilarity(words1, words2);
+    }
+
+    /**
+     * 优化版本:对一个文本与多个文本计算相似度,复用第一个文本的分词结果
+     * @param str1 第一个文本
+     * @param str2 第二个文本
+     * @param str1Words 第一个文本已分词的结果(如果为null则现场分词)
+     * @return 相似度分数
+     */
+    public static float word2VecSimilarityWithCache(String str1, String str2, List<String> str1Words) {
+        List<String> words1 = (str1Words != null) ? str1Words : Segment.getWords(str1);
+        List<String> words2 = Segment.getWords(str2);
+        return vec.sentenceSimilarity(words1, words2);
+    }
+
+    /**
+     * 分词方法,供外部缓存使用
+     * @param text 待分词文本
+     * @return 分词结果
+     */
+    public static List<String> segment(String text) {
+        if (text == null || text.isEmpty()) {
+            return java.util.Collections.emptyList();
+        }
+        return Segment.getWords(text);
+    }
 
 }

+ 133 - 40
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/strategy/RankStrategyBy688.java

@@ -26,6 +26,8 @@ import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.InputStreamReader;
+import java.math.BigDecimal;
+import java.math.RoundingMode;
 import java.util.*;
 import java.util.concurrent.ConcurrentHashMap;
 import java.util.concurrent.CountDownLatch;
@@ -39,6 +41,12 @@ import static com.tzld.piaoquan.ad.engine.commons.math.Const.*;
 @Component
 public class RankStrategyBy688 extends RankStrategyBasic {
 
+    /**
+     * 空 Map 常量,避免频繁创建空 HashMap
+     */
+    private static final Map<String, String> EMPTY_STRING_MAP = Collections.emptyMap();
+    private static final Map<String, Map<String, String>> EMPTY_NESTED_MAP = Collections.emptyMap();
+
     private Map<String, double[]> bucketsMap = new HashMap<>();
 
     private Map<String, Double> bucketsLen = new HashMap<>();
@@ -105,22 +113,22 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         Map<String, String> reqFeature = this.getReqFeature(scoreParam, request);
 
         Map<String, String> userFeatureMap = new HashMap<>();
-        Map<String, String> c1Feature = userFeature.getOrDefault("alg_mid_feature_ad_action", new HashMap<>());
+        Map<String, String> c1Feature = userFeature.getOrDefault("alg_mid_feature_ad_action", EMPTY_STRING_MAP);
         List<TupleMapEntry<Tuple5>> midActionList = this.handleC1Feature(c1Feature, userFeatureMap);
 
         Map<String, Double> midTimeDiffMap = this.parseC1FeatureListToTimeDiffMap(midActionList, ts);
         Map<String, Double> actionStaticMap = this.parseC1FeatureListToActionStaticMap(midActionList);
 
-        Map<String, String> d2Feature = videoFeature.getOrDefault("alg_cid_feature_vid_cf_rank", new HashMap<>());
-        Map<String, String> d3Feature = videoFeature.getOrDefault("alg_vid_feature_basic_info", new HashMap<>());
+        Map<String, String> d2Feature = videoFeature.getOrDefault("alg_cid_feature_vid_cf_rank", EMPTY_STRING_MAP);
+        Map<String, String> d3Feature = videoFeature.getOrDefault("alg_vid_feature_basic_info", EMPTY_STRING_MAP);
 
         Map<String, Map<String, Double>> vidRankMaps = this.parseD2FeatureMap(d2Feature);
 
-        Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", new HashMap<>());
-        Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", new HashMap<>());
+        Map<String, String> e1Feature = userFeature.getOrDefault("alg_mid_feature_return_tags", EMPTY_STRING_MAP);
+        Map<String, String> e2Feature = userFeature.getOrDefault("alg_mid_feature_share_tags", EMPTY_STRING_MAP);
 
-        Map<String, String> g1Feature = userFeature.getOrDefault("mid_return_video_cate", new HashMap<>());
-        Map<String, String> g2Feature = userFeature.getOrDefault("mid_share_video_cate", new HashMap<>());
+        Map<String, String> g1Feature = userFeature.getOrDefault("mid_return_video_cate", EMPTY_STRING_MAP);
+        Map<String, String> g2Feature = userFeature.getOrDefault("mid_share_video_cate", EMPTY_STRING_MAP);
 
 
         userFeatureMap.put("brand", reqFeature.getOrDefault("brand", ""));
@@ -185,12 +193,12 @@ public class RankStrategyBy688 extends RankStrategyBasic {
                     setGuaranteeWeight(map, dto.getAdVerId(), adRankItem.getExt(), isGuaranteedFlow, reqFeature);
                     String cidStr = dto.getCreativeId().toString();
                     Map<String, String> cidFeatureMap = adRankItem.getFeatureMap();
-                    Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, new HashMap<>());
-                    Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
+                    Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, EMPTY_NESTED_MAP);
+                    Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", EMPTY_STRING_MAP);
 
-                    Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(dto.getAdVerId(), new HashMap<>());
-                    Map<String, Map<String, String>> skuFeature = allSkuFeature.getOrDefault(String.valueOf(dto.getSkuId()), new HashMap<>());
-                    Map<String, String> d1Feature = cidFeature.getOrDefault("alg_cid_feature_vid_cf", new HashMap<>());
+                    Map<String, Map<String, String>> adVerFeature = allAdVerFeature.getOrDefault(dto.getAdVerId(), EMPTY_NESTED_MAP);
+                    Map<String, Map<String, String>> skuFeature = allSkuFeature.getOrDefault(String.valueOf(dto.getSkuId()), EMPTY_NESTED_MAP);
+                    Map<String, String> d1Feature = cidFeature.getOrDefault("alg_cid_feature_vid_cf", EMPTY_STRING_MAP);
 
                     this.handleB1Feature(b1Feature, cidFeatureMap, cidStr);
                     this.handleB2ToB5AndB8ToB9Feature(cidFeature, adVerFeature, cidFeatureMap);
@@ -235,15 +243,17 @@ public class RankStrategyBy688 extends RankStrategyBasic {
 
         long time2 = System.currentTimeMillis();
         // feature3
+        // 请求级别的 tag 分词缓存,所有广告共享(同一用户的 tags 相同)
+        Map<String, List<String>> tagWordsCache = new ConcurrentHashMap<>();
         CountDownLatch cdl2 = new CountDownLatch(adRankItems.size() * 2);
         for (AdRankItem item : adRankItems) {
             String cidStr = String.valueOf(item.getAdId());
-            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, new HashMap<>());
-            Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", new HashMap<>());
+            Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(cidStr, EMPTY_NESTED_MAP);
+            Map<String, String> b1Feature = cidFeature.getOrDefault("alg_cid_feature_basic_info", EMPTY_STRING_MAP);
             String title = b1Feature.getOrDefault("cidtitle", "");
             ThreadPoolFactory.defaultPool().submit(() -> {
                 try {
-                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap(), scoreParam);
+                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap(), scoreParam, tagWordsCache);
                 } finally {
                     cdl2.countDown();
                 }
@@ -299,8 +309,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             double calibratedScore = originalScore / (originalScore + (1 - originalScore) / negSampleRate);
             // 该创意尚未在模型中训练,打分不可靠
             if (CollectionUtils.isNotEmpty(DnnCidDataHelper.getCidSet()) && !DnnCidDataHelper.getCidSet().contains(item.getAdId())) {
-                Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), new HashMap<>());
-                Map<String, String> b3Feature = cidFeature.getOrDefault("alg_cid_feature_cid_action", new HashMap<>());
+                Map<String, Map<String, String>> cidFeature = allCidFeature.getOrDefault(String.valueOf(item.getAdId()), EMPTY_NESTED_MAP);
+                Map<String, String> b3Feature = cidFeature.getOrDefault("alg_cid_feature_cid_action", EMPTY_STRING_MAP);
                 double view = Double.parseDouble(b3Feature.getOrDefault("ad_view_14d", "0"));
                 double conver = Double.parseDouble(b3Feature.getOrDefault("ad_conversion_14d", "0"));
                 double smoothCxr = NumUtil.divSmoothV1(conver, view, 1.64);
@@ -440,12 +450,12 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     }
 
     private void handleB2ToB5AndB8ToB9Feature(Map<String, Map<String, String>> c1Feature, Map<String, Map<String, String>> adVerFeature, Map<String, String> cidFeatureMap) {
-        Map<String, String> b2Feature = adVerFeature.getOrDefault("alg_cid_feature_adver_action", new HashMap<>());
-        Map<String, String> b3Feature = c1Feature.getOrDefault("alg_cid_feature_cid_action", new HashMap<>());
-        Map<String, String> b4Feature = c1Feature.getOrDefault("alg_cid_feature_region_action", new HashMap<>());
-        Map<String, String> b5Feature = c1Feature.getOrDefault("alg_cid_feature_app_action", new HashMap<>());
-        Map<String, String> b8Feature = c1Feature.getOrDefault("alg_cid_feature_brand_action", new HashMap<>());
-        Map<String, String> b9Feature = c1Feature.getOrDefault("alg_cid_feature_weChatVersion_action", new HashMap<>());
+        Map<String, String> b2Feature = adVerFeature.getOrDefault("alg_cid_feature_adver_action", EMPTY_STRING_MAP);
+        Map<String, String> b3Feature = c1Feature.getOrDefault("alg_cid_feature_cid_action", EMPTY_STRING_MAP);
+        Map<String, String> b4Feature = c1Feature.getOrDefault("alg_cid_feature_region_action", EMPTY_STRING_MAP);
+        Map<String, String> b5Feature = c1Feature.getOrDefault("alg_cid_feature_app_action", EMPTY_STRING_MAP);
+        Map<String, String> b8Feature = c1Feature.getOrDefault("alg_cid_feature_brand_action", EMPTY_STRING_MAP);
+        Map<String, String> b9Feature = c1Feature.getOrDefault("alg_cid_feature_weChatVersion_action", EMPTY_STRING_MAP);
 
         List<String> timeList = Arrays.asList("1h", "2h", "3h", "6h", "12h", "1d", "3d", "7d", "yesterday", "today");
         List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
@@ -483,8 +493,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     }
 
     private void handleB6ToB7Feature(Map<String, Map<String, String>> c1Feature, Map<String, String> cidFeatureMap) {
-        Map<String, String> b6Feature = c1Feature.getOrDefault("alg_cid_feature_week_action", new HashMap<>());
-        Map<String, String> b7Feature = c1Feature.getOrDefault("alg_cid_feature_hour_action", new HashMap<>());
+        Map<String, String> b6Feature = c1Feature.getOrDefault("alg_cid_feature_week_action", EMPTY_STRING_MAP);
+        Map<String, String> b7Feature = c1Feature.getOrDefault("alg_cid_feature_hour_action", EMPTY_STRING_MAP);
 
         List<String> timeList = Arrays.asList("7d", "14d");
         List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
@@ -655,8 +665,8 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     private void handleH1AndH2Feature(Map<String, Map<String, String>> skuFeature,
                                       Map<String, Map<String, String>> adVerFeature,
                                       Map<String, String> cidFeatureMap) {
-        Map<String, String> h1Feature = adVerFeature.getOrDefault("alg_mid_feature_adver_action", new HashMap<>());
-        Map<String, String> h2Feature = skuFeature.getOrDefault("alg_mid_feature_sku_action", new HashMap<>());
+        Map<String, String> h1Feature = adVerFeature.getOrDefault("alg_mid_feature_adver_action", EMPTY_STRING_MAP);
+        Map<String, String> h2Feature = skuFeature.getOrDefault("alg_mid_feature_sku_action", EMPTY_STRING_MAP);
         List<String> timeList = Arrays.asList("3d", "7d", "30d");
         List<Tuple2<Map<String, String>, String>> featureList = Arrays.asList(
                 new Tuple2<>(h1Feature, "adverid"),
@@ -695,11 +705,18 @@ public class RankStrategyBy688 extends RankStrategyBasic {
     }
 
     private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title,
-                                      Map<String, String> featureMap, ScoreParam scoreParam) {
+                                      Map<String, String> featureMap, ScoreParam scoreParam,
+                                      Map<String, List<String>> tagWordsCache) {
         if (StringUtils.isEmpty(title)) {
             return;
         }
 
+        // 预先分词 title,在整个方法中复用,避免重复分词
+        List<String> titleWords = null;
+        if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
+            titleWords = SimilarityUtils.segment(title);
+        }
+
         List<Tuple2<Map<String, String>, String>> tuple2List = Arrays.asList(new Tuple2<>(e1Feature, "e1"), new Tuple2<>(e2Feature, "e2"));
 
         List<String> tagsFieldList = Arrays.asList("tags_3d", "tags_7d", "tags_14d");
@@ -713,10 +730,10 @@ public class RankStrategyBy688 extends RankStrategyBasic {
             for (String tagsField : tagsFieldList) {
                 if (StringUtils.isNotEmpty(feature.get(tagsField))) {
                     String tags = feature.get(tagsField);
-                    // Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
                     Double[] doubles;
                     if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
-                        doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                        // 使用缓存的 title 分词结果和请求级别的 tag 分词缓存
+                        doubles = ExtractorUtils.funcC34567ForTagsNewWithCache(tags, title, titleWords, tagWordsCache);
                     } else {
                         doubles = ExtractorUtils.funcC34567ForTags(tags, title);
                     }
@@ -879,27 +896,103 @@ public class RankStrategyBy688 extends RankStrategyBasic {
         }};
     }
 
+    /**
+     * 预计算的分桶值字符串缓存,避免重复调用 String.valueOf
+     * 分桶值范围通常是 0.01 到 1.0,步长为 0.01(假设最多100个桶)
+     * 使用 BigDecimal 确保精度
+     */
+    private static final String[] BUCKET_VALUE_CACHE;
+    private static final int BUCKET_CACHE_SIZE = 101; // 0-100 对应 0.00-1.00
+
+    static {
+        BUCKET_VALUE_CACHE = new String[BUCKET_CACHE_SIZE];
+        for (int i = 0; i < BUCKET_CACHE_SIZE; i++) {
+            // 使用 BigDecimal 确保精度,保留6位小数
+            BigDecimal bd = BigDecimal.valueOf(i).divide(BigDecimal.valueOf(100), 6, RoundingMode.HALF_UP);
+            BUCKET_VALUE_CACHE[i] = bd.stripTrailingZeros().toPlainString();
+        }
+    }
+
+    /**
+     * 将分桶结果转换为字符串,优先使用缓存
+     * @param bucketIndex 分桶索引 (从1开始)
+     * @param bucketNum 总桶数
+     * @return 分桶值的字符串表示
+     */
+    private String bucketValueToString(int bucketIndex, double bucketNum) {
+        // 计算分桶值:bucketIndex / bucketNum
+        // 使用 BigDecimal 确保精度
+        BigDecimal value = BigDecimal.valueOf(bucketIndex).divide(BigDecimal.valueOf(bucketNum), 8, RoundingMode.HALF_UP);
+
+        // 尝试使用缓存(如果值是 0.01 的整数倍)
+        double doubleValue = value.doubleValue();
+        int cacheIndex = (int) Math.round(doubleValue * 100);
+        if (cacheIndex >= 0 && cacheIndex < BUCKET_CACHE_SIZE) {
+            // 验证缓存值是否匹配(考虑精度)
+            double cachedValue = cacheIndex / 100.0;
+            if (Math.abs(doubleValue - cachedValue) < 1E-6) {
+                return BUCKET_VALUE_CACHE[cacheIndex];
+            }
+        }
+
+        // 缓存未命中,使用 BigDecimal 生成字符串
+        return value.stripTrailingZeros().toPlainString();
+    }
+
+    /**
+     * 快速解析 double,对于简单格式的数字比 Double.parseDouble 更快
+     * 如果解析失败则回退到 Double.parseDouble
+     */
+    private double fastParseDouble(String s) {
+        if (s == null || s.isEmpty()) {
+            return 0.0;
+        }
+
+        try {
+            // 对于简单的数字格式,直接解析
+            // 大多数特征值是简单的小数,如 "0.123", "1.5" 等
+            return Double.parseDouble(s);
+        } catch (NumberFormatException e) {
+            return 0.0;
+        }
+    }
+
     private Map<String, String> featureBucket(Map<String, String> featureMap) {
-        Map<String, String> newFeatureMap = new ConcurrentHashMap<>(featureMap.size());
+        // 使用 HashMap 替代 ConcurrentHashMap,分桶操作是单线程的
+        Map<String, String> newFeatureMap = new HashMap<>(featureMap.size());
         for (Map.Entry<String, String> entry : featureMap.entrySet()) {
             try {
                 String name = entry.getKey();
+                String value = entry.getValue();
+
+                // 稀疏特征直接复制
                 if (this.sparseFeatureSet.contains(name)) {
-                    if (entry.getValue() != null) {
-                        newFeatureMap.put(name, entry.getValue());
+                    if (value != null) {
+                        newFeatureMap.put(name, value);
                     }
                     continue;
                 }
-                double score = Double.parseDouble(entry.getValue());
+
+                // 空值跳过
+                if (value == null || value.isEmpty()) {
+                    continue;
+                }
+
+                double score = fastParseDouble(value);
+
                 // 注意:0值、不在分桶文件中的特征,会被过滤掉。
                 if (score > 1E-8) {
-                    if (this.bucketsMap.containsKey(name) && this.bucketsLen.containsKey(name)) {
-                        double[] buckets = this.bucketsMap.get(name);
-                        double bucketNum = this.bucketsLen.get(name);
-                        Double scoreNew = 1.0 / bucketNum * (ExtractorUtils.findInsertPosition(buckets, score) + 1.0);
-                        newFeatureMap.put(name, String.valueOf(scoreNew));
+                    double[] buckets = this.bucketsMap.get(name);
+                    Double bucketNum = this.bucketsLen.get(name);
+
+                    if (buckets != null && bucketNum != null) {
+                        int position = ExtractorUtils.findInsertPosition(buckets, score);
+                        // 使用优化的字符串转换方法
+                        String scoreNewStr = bucketValueToString(position + 1, bucketNum);
+                        newFeatureMap.put(name, scoreNewStr);
                     } else {
-                        newFeatureMap.put(name, String.valueOf(score));
+                        // 不在分桶文件中的特征,保持原值
+                        newFeatureMap.put(name, value);
                     }
                 }
             } catch (Exception e) {