|  | @@ -15,6 +15,7 @@ import com.tzld.longarticle.recommend.server.service.recall.strategy.DefaultReca
 | 
	
		
			
				|  |  |  import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
 | 
	
		
			
				|  |  |  import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
 | 
	
		
			
				|  |  |  import com.tzld.longarticle.recommend.server.util.JSONUtils;
 | 
	
		
			
				|  |  | +import com.tzld.longarticle.recommend.server.util.Md5Util;
 | 
	
		
			
				|  |  |  import lombok.extern.slf4j.Slf4j;
 | 
	
		
			
				|  |  |  import org.apache.commons.collections4.CollectionUtils;
 | 
	
		
			
				|  |  |  import org.springframework.beans.BeanUtils;
 | 
	
	
		
			
				|  | @@ -121,24 +122,56 @@ public class RecallService implements ApplicationContextAware {
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      public void setContentCategory(List<Content> contentList) {
 | 
	
		
			
				|  |  |          long start = System.currentTimeMillis();
 | 
	
		
			
				|  |  | -        List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
 | 
	
		
			
				|  |  | -        List<CrawlerMetaArticle> categoryList = getContentCategoryByChannelContentId(channelContentIds);
 | 
	
		
			
				|  |  | +        Map<String, String> articleMd5Map = new HashMap<>();
 | 
	
		
			
				|  |  | +        List<String> md5List = new ArrayList<>();
 | 
	
		
			
				|  |  | +        for (Content content : contentList) {
 | 
	
		
			
				|  |  | +            String md5 = generateArticleUniqueMd5(content.getCrawlerLink());
 | 
	
		
			
				|  |  | +            md5List.add(md5);
 | 
	
		
			
				|  |  | +            articleMd5Map.put(content.getId(), md5);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        List<CrawlerMetaArticle> categoryList = getByUniqueIndexIn(md5List);
 | 
	
		
			
				|  |  |          if (CollectionUtils.isEmpty(categoryList)) {
 | 
	
		
			
				|  |  |              return;
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        Map<String, List<String>> categoryMap = categoryList.stream().collect(Collectors.groupingBy(CrawlerMetaArticle::getChannelContentId,
 | 
	
		
			
				|  |  | -                Collectors.mapping(CrawlerMetaArticle::getCategory, Collectors.toList())));
 | 
	
		
			
				|  |  | +        Map<String, String> categoryMap = categoryList.stream().collect(
 | 
	
		
			
				|  |  | +                Collectors.toMap(CrawlerMetaArticle::getUniqueIndex, CrawlerMetaArticle::getCategory));
 | 
	
		
			
				|  |  |          for (Content content : contentList) {
 | 
	
		
			
				|  |  | -            content.setCategory(categoryMap.get(content.getCrawlerChannelContentId()));
 | 
	
		
			
				|  |  | +            String md5 = articleMd5Map.get(content.getId());
 | 
	
		
			
				|  |  | +            content.setCategory(categoryMap.get(md5));
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  |          log.info("setContentCategory cost:{}", System.currentTimeMillis() - start);
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    private List<CrawlerMetaArticle> getContentCategoryByChannelContentId(List<String> channelContentIds) {
 | 
	
		
			
				|  |  | -        if (CollectionUtils.isEmpty(channelContentIds)) {
 | 
	
		
			
				|  |  | +    private List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> md5List) {
 | 
	
		
			
				|  |  | +        if (CollectionUtils.isEmpty(md5List)) {
 | 
	
		
			
				|  |  |              return new ArrayList<>();
 | 
	
		
			
				|  |  |          }
 | 
	
		
			
				|  |  | -        return crawlerMetaArticleRepository.getByChannelContentIdIn(channelContentIds);
 | 
	
		
			
				|  |  | +        return crawlerMetaArticleRepository.getByUniqueIndexIn(md5List);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    public static String generateArticleUniqueMd5(String url) {
 | 
	
		
			
				|  |  | +        // Extract parts from the URL
 | 
	
		
			
				|  |  | +        String biz = extractParameter(url, "biz=");
 | 
	
		
			
				|  |  | +        String idx = extractParameter(url, "&idx=");
 | 
	
		
			
				|  |  | +        String sn = extractParameter(url, "&sn=");
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        // Combine the parts and encode them to bytes
 | 
	
		
			
				|  |  | +        String urlBit = String.format("%s-%s-%s", biz, idx, sn);
 | 
	
		
			
				|  |  | +        return Md5Util.encoderByMd5(urlBit);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    private static String extractParameter(String url, String parameter) {
 | 
	
		
			
				|  |  | +        int start = url.indexOf(parameter) + parameter.length();
 | 
	
		
			
				|  |  | +        if (start == -1 || start == url.length()) {
 | 
	
		
			
				|  |  | +            return "";
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +        int end = url.indexOf("&", start);
 | 
	
		
			
				|  |  | +        if (end == -1) {
 | 
	
		
			
				|  |  | +            return url.substring(start);
 | 
	
		
			
				|  |  | +        } else {
 | 
	
		
			
				|  |  | +            return url.substring(start, end);
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      public void setTitleAvgViewCount(List<Content> contentList) {
 | 
	
	
		
			
				|  | @@ -197,4 +230,10 @@ public class RecallService implements ApplicationContextAware {
 | 
	
		
			
				|  |  |          log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
 | 
	
		
			
				|  |  |      }
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | +    public static void main(String[] args) {
 | 
	
		
			
				|  |  | +        String url = "http://mp.weixin.qq.com/s?__biz=Mzg2ODk4MTg3OQ==&mid=2247488306&idx=1&sn=93ebadc5bc7161a0dee48355013d3bc4&chksm=cfb6c1cb2bcdd80dd16d5d604d741a0019ae791125265a042d26100ba21ddb9e5c643ecc2264&scene=126&sessionid=1679649075#rd";
 | 
	
		
			
				|  |  | +        String md5 = generateArticleUniqueMd5(url);
 | 
	
		
			
				|  |  | +        System.out.println("Generated md5: " + md5);
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |  }
 |