Ver Fonte

品类查询修改

wangyunpeng há 10 meses atrás
pai
commit
40bf3b922b

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/Content.java

@@ -30,7 +30,7 @@ public class Content {
      */
     private String contentPoolType; // 内容池类别
     private String crawlerChannelContentId; // 抓取内容channelContentId
-    private List<String> category; // 品类
+    private String category; // 品类
     private String crawlerLink;
     private String crawlerTitle;
     private String crawlerCoverUrl;

+ 3 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/aigc/CrawlerMetaArticleRepository.java

@@ -9,6 +9,8 @@ import java.util.List;
 @Repository
 public interface CrawlerMetaArticleRepository extends JpaRepository<CrawlerMetaArticle, Long> {
 
-    List<CrawlerMetaArticle> getByChannelContentIdIn(List<String>channelContentIds);
+    List<CrawlerMetaArticle> getByChannelContentIdIn(List<String> channelContentIds);
+
+    List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> uniqueIndexList);
 
 }

+ 47 - 8
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recall/RecallService.java

@@ -15,6 +15,7 @@ import com.tzld.longarticle.recommend.server.service.recall.strategy.DefaultReca
 import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
 import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 import com.tzld.longarticle.recommend.server.util.JSONUtils;
+import com.tzld.longarticle.recommend.server.util.Md5Util;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.BeanUtils;
@@ -121,24 +122,56 @@ public class RecallService implements ApplicationContextAware {
 
     public void setContentCategory(List<Content> contentList) {
         long start = System.currentTimeMillis();
-        List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
-        List<CrawlerMetaArticle> categoryList = getContentCategoryByChannelContentId(channelContentIds);
+        Map<String, String> articleMd5Map = new HashMap<>();
+        List<String> md5List = new ArrayList<>();
+        for (Content content : contentList) {
+            String md5 = generateArticleUniqueMd5(content.getCrawlerLink());
+            md5List.add(md5);
+            articleMd5Map.put(content.getId(), md5);
+        }
+        List<CrawlerMetaArticle> categoryList = getByUniqueIndexIn(md5List);
         if (CollectionUtils.isEmpty(categoryList)) {
             return;
         }
-        Map<String, List<String>> categoryMap = categoryList.stream().collect(Collectors.groupingBy(CrawlerMetaArticle::getChannelContentId,
-                Collectors.mapping(CrawlerMetaArticle::getCategory, Collectors.toList())));
+        Map<String, String> categoryMap = categoryList.stream().collect(
+                Collectors.toMap(CrawlerMetaArticle::getUniqueIndex, CrawlerMetaArticle::getCategory));
         for (Content content : contentList) {
-            content.setCategory(categoryMap.get(content.getCrawlerChannelContentId()));
+            String md5 = articleMd5Map.get(content.getId());
+            content.setCategory(categoryMap.get(md5));
         }
         log.info("setContentCategory cost:{}", System.currentTimeMillis() - start);
     }
 
-    private List<CrawlerMetaArticle> getContentCategoryByChannelContentId(List<String> channelContentIds) {
-        if (CollectionUtils.isEmpty(channelContentIds)) {
+    private List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> md5List) {
+        if (CollectionUtils.isEmpty(md5List)) {
             return new ArrayList<>();
         }
-        return crawlerMetaArticleRepository.getByChannelContentIdIn(channelContentIds);
+        return crawlerMetaArticleRepository.getByUniqueIndexIn(md5List);
+    }
+
+
+    public static String generateArticleUniqueMd5(String url) {
+        // Extract parts from the URL
+        String biz = extractParameter(url, "biz=");
+        String idx = extractParameter(url, "&idx=");
+        String sn = extractParameter(url, "&sn=");
+
+        // Combine the parts and encode them to bytes
+        String urlBit = String.format("%s-%s-%s", biz, idx, sn);
+        return Md5Util.encoderByMd5(urlBit);
+    }
+
+    private static String extractParameter(String url, String parameter) {
+        int start = url.indexOf(parameter) + parameter.length();
+        if (start == -1 || start == url.length()) {
+            return "";
+        }
+        int end = url.indexOf("&", start);
+        if (end == -1) {
+            return url.substring(start);
+        } else {
+            return url.substring(start, end);
+        }
     }
 
     public void setTitleAvgViewCount(List<Content> contentList) {
@@ -197,4 +230,10 @@ public class RecallService implements ApplicationContextAware {
         log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
     }
 
+    public static void main(String[] args) {
+        String url = "http://mp.weixin.qq.com/s?__biz=Mzg2ODk4MTg3OQ==&mid=2247488306&idx=1&sn=93ebadc5bc7161a0dee48355013d3bc4&chksm=cfb6c1cb2bcdd80dd16d5d604d741a0019ae791125265a042d26100ba21ddb9e5c643ecc2264&scene=126&sessionid=1679649075#rd";
+        String md5 = generateArticleUniqueMd5(url);
+        System.out.println("Generated md5: " + md5);
+    }
+
 }