丁云鹏 6 months ago
parent
commit
4e5cb6297a

+ 5 - 0
recommend-server-service/pom.xml

@@ -265,6 +265,11 @@
             <artifactId>spark-mllib_2.12</artifactId>
             <version>3.3.1</version>
         </dependency>
+        <dependency>
+            <groupId>com.tzld.piaoquan</groupId>
+            <artifactId>recommend-similarity</artifactId>
+            <version>1.0.0</version>
+        </dependency>
     </dependencies>
 
 

+ 4 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/WarmUpService.java

@@ -1,6 +1,8 @@
 package com.tzld.piaoquan.recommend.server.service;
 
 import com.tzld.piaoquan.recommend.server.repository.WxVideoStatusRepository;
+import com.tzld.piaoquan.recommend.server.util.SimilarityUtils;
+import com.tzld.piaoquan.recommend.similarity.word2vec.Segment;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.beans.factory.annotation.Qualifier;
 import org.springframework.core.annotation.Order;
@@ -35,5 +37,7 @@ public class WarmUpService {
         com.tzld.piaoquan.recommend.server.service.score.ScorerUtils.warmUp();
         com.tzld.piaoquan.recommend.server.framework.score.ScorerUtils.warmUp();
         wxVideoStatusRepository.count();
+        SimilarityUtils.init();
+        Segment.getWords("1");
     }
 }

+ 1 - 1
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/filter/AbstractFilterService.java

@@ -111,7 +111,7 @@ public abstract class AbstractFilterService {
         strategies.add(ServiceBeanFactory.getBean(SecurityStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(PreViewedStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(ViewedStrategy.class));
-        strategies.add(ServiceBeanFactory.getBean(RecommendStatusStrategy.class));
+        //strategies.add(ServiceBeanFactory.getBean(RecommendStatusStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(SupplyExpStrategy.class));
         strategies.add(ServiceBeanFactory.getBean(AppletVideoStatusStrategy.class));
         switch (param.getAppType()) {

+ 3 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/RankService.java

@@ -50,6 +50,9 @@ public class RankService {
     @ApolloJsonValue("${region.recall.return.size:{}}")
     protected Map<String, Map<String, Integer>> regionRecallReturnSize;
 
+    @Value("${word2vec.exp: 692}")
+    protected String word2vecExp;
+
     public RankResult rank(RankParam param) {
         if (param == null
                 || param.getRecallResult() == null

+ 29 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/extractor/ExtractorUtils.java

@@ -5,6 +5,8 @@ import java.time.format.DateTimeFormatter;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.Map;
+
+import com.tzld.piaoquan.recommend.server.util.SimilarityUtils;
 import org.xm.Similarity;
 public class ExtractorUtils {
 
@@ -59,6 +61,33 @@ public class ExtractorUtils {
         Double[] result = {(double) d1, d3, d4};
         return result;
     }
+
+    public static Double[] funcC34567ForTagsNew(String tags, String title) {
+        String[] tagsList = tags.split(",");
+        int d1 = 0;
+        List<String> d2 = new ArrayList<>();
+        double d3 = 0.0;
+        double d4 = 0.0;
+
+        for (String tag : tagsList) {
+            if (title.contains(tag)) {
+                d1++;
+                d2.add(tag);
+            }
+            double score = SimilarityUtils.word2VecSimilarity(tag, title);
+            if (score > d3) {
+                d3 = score;
+            }
+            d4 += score;
+        }
+
+        d4 = (tagsList.length > 0) ? d4 / tagsList.length : d4;
+
+        // 使用数组来返回多个值
+        Double[] result = {(double) d1, d3, d4};
+        return result;
+    }
+
     public static Double calDiv(double a, double b){
         if (a == 0 || b == 0){
             return 0D;

+ 12 - 6
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV562.java

@@ -32,10 +32,6 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
     @ApolloJsonValue("${rank.score.merge.weightv562:}")
     private Map<String, Double> mergeWeight;
 
-
-
-
-
     @Autowired
     private FeatureService featureService;
 
@@ -241,7 +237,12 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
                             String tags = c34567Map.getOrDefault(key, "");
                             if (!tags.isEmpty()) {
                                 Future<Pair<String, Double[]>> future = ThreadPoolFactory.defaultPool().submit(() -> {
-                                    Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                                    Double[] doubles = null;
+                                    if (param.getAbExpCodes().contains(word2vecExp)) {
+                                        doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                                    } else {
+                                        doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                                    }
                                     return Pair.create(key, doubles);
                                 });
                                 futures.add(future);
@@ -263,7 +264,12 @@ public class RankStrategy4RegionMergeModelV562 extends RankStrategy4RegionMergeM
                         for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
                             String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
                             if (!tags.isEmpty()) {
-                                Double[] doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                                Double[] doubles = null;
+                                if (param.getAbExpCodes().contains(word2vecExp)) {
+                                    doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                                } else {
+                                    doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                                }
                                 featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
                                 featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
                                 featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);

+ 16 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/service/rank/strategy/RankStrategy4RegionMergeModelV565.java

@@ -228,6 +228,7 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
 
             String title = videoInfo.getOrDefault("title", "");
             if (!title.isEmpty()) {
+<<<<<<< HEAD
                 if (similarityConcurrent) {
                     List<Future<Pair<String, Double[]>>> futures = new ArrayList<>();
                     for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
@@ -263,6 +264,21 @@ public class RankStrategy4RegionMergeModelV565 extends RankStrategy4RegionMergeM
                                 featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
                                 featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
                             }
+=======
+                for (String name : Arrays.asList("c3_feature", "c4_feature", "c5_feature", "c6_feature", "c7_feature")) {
+                    for (String key_time : Arrays.asList("tags_1d", "tags_3d", "tags_7d")) {
+                        String tags = c34567Map.getOrDefault(name + "_" + key_time, "");
+                        if (!tags.isEmpty()) {
+                            Double[] doubles = null;
+                            if (param.getAbExpCodes().contains(word2vecExp)) {
+                                doubles = ExtractorUtils.funcC34567ForTagsNew(tags, title);
+                            } else {
+                                doubles = ExtractorUtils.funcC34567ForTags(tags, title);
+                            }
+                            featureMap.put(name + "_" + key_time + "_matchnum", doubles[0]);
+                            featureMap.put(name + "_" + key_time + "_maxscore", doubles[1]);
+                            featureMap.put(name + "_" + key_time + "_avgscore", doubles[2]);
+>>>>>>> 56fb133 (word2vec)
                         }
                     }
                 }

+ 62 - 0
recommend-server-service/src/main/java/com/tzld/piaoquan/recommend/server/util/SimilarityUtils.java

@@ -0,0 +1,62 @@
+package com.tzld.piaoquan.recommend.server.util;
+
+import com.tzld.piaoquan.recommend.similarity.word2vec.Segment;
+import com.tzld.piaoquan.recommend.similarity.word2vec.Word2Vec;
+import lombok.extern.slf4j.Slf4j;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.concurrent.Executors;
+import java.util.concurrent.ScheduledExecutorService;
+import java.util.concurrent.TimeUnit;
+import java.util.concurrent.atomic.AtomicBoolean;
+
+/**
+ * @author dyp
+ */
+@Slf4j
+public final class SimilarityUtils {
+
+    private static Word2Vec vec = new Word2Vec();
+
+    private static final AtomicBoolean modelLoaded = new AtomicBoolean(false);
+    private static final AtomicBoolean init = new AtomicBoolean(false);
+
+    private static final ScheduledExecutorService scheduler = Executors.newSingleThreadScheduledExecutor();
+
+    public static void init() {
+        if (init.compareAndSet(false, true)) {
+            scheduler.scheduleAtFixedRate(() -> {
+                try {
+                    long start = System.currentTimeMillis();
+                    String endpoint = PropertiesUtil.getString("oss.endpoint");
+                    String bucketName = "art-recommend";
+                    String path = "similarity/word2vec/Google_word2vec_zhwiki210720_300d.bin";
+                    String accessKeyId = "LTAIP6x1l3DXfSxm";
+                    String accessKetSecret = "KbTaM9ars4OX3PMS6Xm7rtxGr1FLon";
+                    Word2Vec temp = new Word2Vec();
+                    temp.loadGoogleModelFromOss(endpoint, bucketName, path, accessKeyId, accessKetSecret);
+                    vec = temp;
+                    long end = System.currentTimeMillis();
+
+                    if (modelLoaded.compareAndSet(false, true)) {
+                        scheduler.shutdown();
+                        log.info("Model loaded successfully cost {}. Scheduled tasks cancelled.", end - start);
+                    }
+
+                } catch (IOException e) {
+                    log.error("loadGoogleModelFromOss error", e);
+                }
+            }, 0, 5, TimeUnit.MINUTES);
+        }
+    }
+
+
+    public static float word2VecSimilarity(String str1, String str2) {
+        List<String> words1 = Segment.getWords(str1);
+        List<String> words2 = Segment.getWords(str2);
+        return vec.sentenceSimilarity(words1, words2);
+    }
+
+
+}

+ 3 - 0
recommend-server-service/src/main/resources/application-dev.yml

@@ -1,6 +1,9 @@
 server:
   port: 8001
 
+oss:
+  endpoint: oss-cn-hangzhou.aliyuncs.com
+
 eureka:
   instance:
     prefer-ip-address: true #是否优先使用IP地址作为主机名的标识,默认false

+ 3 - 0
recommend-server-service/src/main/resources/application-pre.yml

@@ -1,6 +1,9 @@
 server:
   port: 8080
 
+oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
+
 eureka:
   instance:
     prefer-ip-address: true #是否优先使用IP地址作为主机名的标识,默认false

+ 3 - 0
recommend-server-service/src/main/resources/application-prod.yml

@@ -1,6 +1,9 @@
 server:
   port: 8080
 
+oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
+
 eureka:
   instance:
     prefer-ip-address: true #是否优先使用IP地址作为主机名的标识,默认false

+ 3 - 0
recommend-server-service/src/main/resources/application-test.yml

@@ -1,6 +1,9 @@
 server:
   port: 8080
 
+oss:
+  endpoint: oss-cn-hangzhou-internal.aliyuncs.com
+
 eureka:
   instance:
     prefer-ip-address: true #是否优先使用IP地址作为主机名的标识,默认false