Browse Source

Merge branch 'feature_similarity' of algorithm/ad-engine into master

dingyunpeng 6 months ago
parent
commit
99533dbd3d

+ 5 - 0
ad-engine-commons/pom.xml

@@ -128,6 +128,11 @@
 <!--                </exclusion>-->
 <!--            </exclusions>-->
         </dependency>
+        <dependency>
+            <groupId>com.tzld.piaoquan</groupId>
+            <artifactId>recommend-similarity</artifactId>
+            <version>1.0.0</version>
+        </dependency>
     </dependencies>
 
 </project>

+ 1 - 1
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/config/SparkConfig.java

@@ -9,7 +9,7 @@ import org.springframework.stereotype.Component;
 import javax.annotation.PostConstruct;
 import java.util.Properties;
 
-@Component
+@Component("sparkConfig")
 public class SparkConfig {
 
     @PostConstruct

+ 2 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/score/ScoreParam.java

@@ -22,5 +22,7 @@ public class ScoreParam {
     private String pqtId;
     private Map<String, Object> extraParam = new HashMap<>();
 
+    private Set<String> expCodeSet;
+
 }
 

+ 25 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/util/ExtractorUtils.java

@@ -63,6 +63,31 @@ public class ExtractorUtils {
         return new Double[]{(double) d1, d3, d4};
     }
 
+    public static Double[] funcC34567ForTagsNew(String tags, String title) {
+        String[] tagsList = tags.split(",");
+        int d1 = 0;
+        List<String> d2 = new ArrayList<>();
+        double d3 = 0.0;
+        double d4 = 0.0;
+
+        for (String tag : tagsList) {
+            if (title.contains(tag)) {
+                d1++;
+                d2.add(tag);
+            }
+            double score = SimilarityUtils.word2VecSimilarity(tag, title);
+            if (score > d3) {
+                d3 = score;
+            }
+            d4 += score;
+        }
+
+        d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
+
+        // 使用数组来返回多个值
+        return new Double[]{(double) d1, d3, d4};
+    }
+
     public static Double calDiv(double a, double b) {
         if (a == 0 || b == 0) {
             return 0D;

+ 2 - 1
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/util/PropertiesUtil.java

@@ -1,10 +1,11 @@
 package com.tzld.piaoquan.ad.engine.commons.util;
 
 import org.springframework.context.EnvironmentAware;
+import org.springframework.core.annotation.Order;
 import org.springframework.core.env.Environment;
 import org.springframework.stereotype.Component;
 
-@Component
+@Component("propertiesUtil")
 public class PropertiesUtil implements EnvironmentAware {
 
 

+ 63 - 0
ad-engine-commons/src/main/java/com/tzld/piaoquan/ad/engine/commons/util/SimilarityUtils.java

@@ -0,0 +1,63 @@
+package com.tzld.piaoquan.ad.engine.commons.util;
+
+import com.tzld.piaoquan.recommend.similarity.word2vec.Segment;
+import com.tzld.piaoquan.recommend.similarity.word2vec.Word2Vec;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * @author dyp
+ */
+@Slf4j
+public final class SimilarityUtils {
+
+    private static Word2Vec vec = new Word2Vec();
+
+    private static final AtomicBoolean modelLoaded = new AtomicBoolean(false);
+    private static final AtomicBoolean init = new AtomicBoolean(false);
+
+    private static final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
+
+    public static void init() {
+        if (init.compareAndSet(false, true)) {
+            Segment.getWords("1");
+            scheduler.scheduleAtFixedRate(() -> {
+                try {
+                    long start = System.currentTimeMillis();
+                    String endpoint = PropertiesUtil.getString("oss.endpoint");
+                    String bucketName = "art-recommend";
+                    String path = "similarity/word2vec/Google_word2vec_zhwiki210720_300d.bin";
+                    String accessKeyId = "LTAIP6x1l3DXfSxm";
+                    String accessKetSecret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon";
+                    Word2Vec temp = new Word2Vec();
+                    temp.loadGoogleModelFromOss(endpoint, bucketName, path, accessKeyId, accessKetSecret);
+                    vec = temp;
+                    long end = System.currentTimeMillis();
+
+                    if (modelLoaded.compareAndSet(false, true)) {
+                        scheduler.shutdown();
+                        log.info("Model loaded successfully cost {}. Scheduled tasks cancelled.", end - start);
+                    }
+
+                } catch (IOException e) {
+                    log.error("loadGoogleModelFromOss error", e);
+                }
+            }, 0, 5, TimeUnit.MINUTES);
+        }
+    }
+
+
+    public static float word2VecSimilarity(String str1, String str2) {
+        List<String> words1 = Segment.getWords(str1);
+        List<String> words2 = Segment.getWords(str2);
+        return vec.sentenceSimilarity(words1, words2);
+    }
+
+
+}

+ 0 - 1
ad-engine-server/src/main/java/com/tzld/piaoquan/ad/engine/server/Application.java

@@ -32,7 +32,6 @@ import org.springframework.scheduling.annotation.EnableScheduling;
 public class Application {
     public static void main(String[] args) {
         SpringApplication.run(Application.class, args);
-        ScorerUtils.warmUp();
     }
 
 }

+ 23 - 0
ad-engine-server/src/main/java/com/tzld/piaoquan/ad/engine/server/WarmupService.java

@@ -0,0 +1,23 @@
+package com.tzld.piaoquan.ad.engine.server;
+
+import com.tzld.piaoquan.ad.engine.commons.score.ScorerUtils;
+import com.tzld.piaoquan.ad.engine.commons.util.PropertiesUtil;
+import com.tzld.piaoquan.ad.engine.commons.util.SimilarityUtils;
+import org.springframework.context.annotation.DependsOn;
+import org.springframework.stereotype.Component;
+
+import javax.annotation.PostConstruct;
+
+/**
+ * @author dyp
+ */
+@Component
+@DependsOn({"propertiesUtil", "sparkConfig"})
+public class WarmupService {
+
+    @PostConstruct
+    public void warmup() {
+        ScorerUtils.warmUp();
+        SimilarityUtils.init();
+    }
+}

+ 2 - 1
ad-engine-server/src/main/resources/application-dev.yml

@@ -4,7 +4,7 @@ server:
 eureka:
   client:
     serviceUrl:
-      defaultZone: http://127.0.0.1:7000/eureka/
+      defaultZone: http://deveureka-internal.piaoquantv.com/eureka/
 
 datalog: .
 
@@ -145,6 +145,7 @@ ms:
     getByIpUrl: http://testapi-internal.piaoquantv.com/base-service/region/getByIp
 
 oss:
+  endpoint: oss-cn-hangzhou.aliyuncs.com
   adplatform:
     accessKey: LTAIP6x1l3DXfSxm
     secretKey: KbTaM9ars4OX3PMS6Xm7rtxGr1FLon

+ 1 - 0
ad-engine-server/src/main/resources/application-pre.yml

@@ -131,6 +131,7 @@ ms:
     getByIpUrl: http://preapi-internal.piaoquantv.com/base-service/region/getByIp
 
 oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
   adplatform:
     accessKey: LTAIP6x1l3DXfSxm
     secretKey: KbTaM9ars4OX3PMS6Xm7rtxGr1FLon

+ 1 - 0
ad-engine-server/src/main/resources/application-prod.yml

@@ -133,6 +133,7 @@ ms:
     getByIpUrl: http://api-internal.piaoquantv.com/base-service/region/getByIp
 
 oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
   adplatform:
     accessKey: LTAIP6x1l3DXfSxm
     secretKey: KbTaM9ars4OX3PMS6Xm7rtxGr1FLon

+ 1 - 0
ad-engine-server/src/main/resources/application-test.yml

@@ -127,6 +127,7 @@ ms:
     getByIpUrl: http://testapi-internal.piaoquantv.com/base-service/region/getByIp
 
 oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
   adplatform:
     accessKey: LTAIP6x1l3DXfSxm
     secretKey: KbTaM9ars4OX3PMS6Xm7rtxGr1FLon

+ 0 - 1
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/predict/container/RandWContainer.java

@@ -87,7 +87,6 @@ public class RandWContainer {
             String[] arr=str.split("_");
             randW=Integer.parseInt(arr[0]);
             this.cacheDate=new Date(Long.parseLong(arr[1]));
-            System.out.println("randW="+randW);
         }catch (Exception e){
             log.error("svc=load_randomW status=failed error={}", Arrays.toString(e.getStackTrace()));
             e.printStackTrace();

+ 23 - 6
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/RankStrategyXGBAutoUpdateModel688.java

@@ -12,6 +12,7 @@ import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.MapUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Component;
 import org.xm.Similarity;
 
@@ -37,6 +38,9 @@ public class RankStrategyXGBAutoUpdateModel688 extends RankStrategyXGBBasic {
 
     private Map<String, Double> bucketsLen = new HashMap<>();
 
+    @Value("${word2vec.exp:694}")
+    private String word2vecExp;
+
     @Override
     public List<AdRankItem> adItemRank(RankRecommendRequestParam request, ScoreParam scoreParam) {
 
@@ -140,14 +144,14 @@ public class RankStrategyXGBAutoUpdateModel688 extends RankStrategyXGBBasic {
             String title = b1Feature.getOrDefault("cidtitle", "");
             ThreadPoolFactory.defaultPool().submit(() -> {
                 try {
-                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap());
+                    this.handleE1AndE2Feature(e1Feature, e2Feature, title, item.getFeatureMap(), scoreParam);
                 } finally {
                     cdl2.countDown();
                 }
             });
             ThreadPoolFactory.defaultPool().submit(() -> {
                 try {
-                    this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap());
+                    this.handleD3AndB1Feature(d3Feature, title, item.getFeatureMap(), scoreParam);
                 } finally {
                     cdl2.countDown();
                 }
@@ -434,16 +438,23 @@ public class RankStrategyXGBAutoUpdateModel688 extends RankStrategyXGBBasic {
         }
     }
 
-    private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap) {
+    private void handleD3AndB1Feature(Map<String, String> d3Feature, String cTitle, Map<String, String> featureMap,
+                                      ScoreParam scoreParam) {
         if (MapUtils.isEmpty(d3Feature) || !d3Feature.containsKey("title") || StringUtils.isEmpty(cTitle)) {
             return;
         }
         String vTitle = d3Feature.get("title");
-        double score = Similarity.conceptSimilarity(cTitle, vTitle);
+        double score;
+        if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
+            score = SimilarityUtils.word2VecSimilarity(cTitle, vTitle);
+        } else {
+            score = Similarity.conceptSimilarity(cTitle, vTitle);
+        }
         featureMap.put("ctitle_vtitle_similarity", String.valueOf(score));
     }
 
-    private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title, Map<String, String> featureMap) {
+    private void handleE1AndE2Feature(Map<String, String> e1Feature, Map<String, String> e2Feature, String title,
+                                      Map<String, String> featureMap, ScoreParam scoreParam) {
         if (StringUtils.isEmpty(title)) {
             return;
         }
@@ -461,7 +472,13 @@ public class RankStrategyXGBAutoUpdateModel688 extends RankStrategyXGBBasic {
             for (String tagsField : tagsFieldList) {
                 if (StringUtils.isNotEmpty(feature.get(tagsField))) {
                     String tags = feature.get(tagsField);
-                    Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    //Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    Double[] doubles;
+                    if (scoreParam.getExpCodeSet().contains(word2vecExp)) {
+                        doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                    } else {
+                        doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                    }
                     featureMap.put(prefix + "_" + tagsField + "_matchnum", String.valueOf(doubles[0]));
                     featureMap.put(prefix + "_" + tagsField + "_maxscore", String.valueOf(doubles[1]));
                     featureMap.put(prefix + "_" + tagsField + "_avgscore", String.valueOf(doubles[2]));

+ 6 - 0
ad-engine-service/src/main/java/com/tzld/piaoquan/ad/engine/service/score/convert/RequestConvert.java

@@ -1,6 +1,7 @@
 package com.tzld.piaoquan.ad.engine.service.score.convert;
 
 import com.tzld.piaoquan.ad.engine.commons.score.ScoreParam;
+import com.tzld.piaoquan.ad.engine.commons.util.AbUtil;
 import com.tzld.piaoquan.ad.engine.service.score.dto.AdPlatformCreativeDTO;
 import com.tzld.piaoquan.ad.engine.service.score.param.RecommendRequestParam;
 import com.tzld.piaoquan.recommend.feature.domain.ad.base.AdRankItem;
@@ -10,6 +11,7 @@ import java.time.LocalDateTime;
 import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Set;
 
 public class RequestConvert {
 
@@ -36,6 +38,10 @@ public class RequestConvert {
         scoreParam.setNewExpGroup(request.getNewExpGroup());
         scoreParam.setPqtId(request.getPqtId());
         scoreParam.setMid(request.getMid());
+
+        Set<String> expCodeSet = AbUtil.unfoldAllExpCode(request.getAdAbExpArr());
+        scoreParam.setExpCodeSet(expCodeSet);
+
         return scoreParam;
     }