|
@@ -15,6 +15,7 @@ import com.tzld.longarticle.recommend.server.service.recall.strategy.DefaultReca
|
|
|
import com.tzld.longarticle.recommend.server.service.score.AvgReadDTO;
|
|
|
import com.tzld.longarticle.recommend.server.util.CommonCollectionUtils;
|
|
|
import com.tzld.longarticle.recommend.server.util.JSONUtils;
|
|
|
+import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.springframework.beans.BeanUtils;
|
|
@@ -121,24 +122,56 @@ public class RecallService implements ApplicationContextAware {
|
|
|
|
|
|
public void setContentCategory(List<Content> contentList) {
|
|
|
long start = System.currentTimeMillis();
|
|
|
- List<String> channelContentIds = contentList.stream().map(Content::getCrawlerChannelContentId).collect(Collectors.toList());
|
|
|
- List<CrawlerMetaArticle> categoryList = getContentCategoryByChannelContentId(channelContentIds);
|
|
|
+ Map<String, String> articleMd5Map = new HashMap<>();
|
|
|
+ List<String> md5List = new ArrayList<>();
|
|
|
+ for (Content content : contentList) {
|
|
|
+ String md5 = generateArticleUniqueMd5(content.getCrawlerLink());
|
|
|
+ md5List.add(md5);
|
|
|
+ articleMd5Map.put(content.getId(), md5);
|
|
|
+ }
|
|
|
+ List<CrawlerMetaArticle> categoryList = getByUniqueIndexIn(md5List);
|
|
|
if (CollectionUtils.isEmpty(categoryList)) {
|
|
|
return;
|
|
|
}
|
|
|
- Map<String, List<String>> categoryMap = categoryList.stream().collect(Collectors.groupingBy(CrawlerMetaArticle::getChannelContentId,
|
|
|
- Collectors.mapping(CrawlerMetaArticle::getCategory, Collectors.toList())));
|
|
|
+ Map<String, String> categoryMap = categoryList.stream().collect(
|
|
|
+ Collectors.toMap(CrawlerMetaArticle::getUniqueIndex, CrawlerMetaArticle::getCategory));
|
|
|
for (Content content : contentList) {
|
|
|
- content.setCategory(categoryMap.get(content.getCrawlerChannelContentId()));
|
|
|
+ String md5 = articleMd5Map.get(content.getId());
|
|
|
+ content.setCategory(categoryMap.get(md5));
|
|
|
}
|
|
|
log.info("setContentCategory cost:{}", System.currentTimeMillis() - start);
|
|
|
}
|
|
|
|
|
|
- private List<CrawlerMetaArticle> getContentCategoryByChannelContentId(List<String> channelContentIds) {
|
|
|
- if (CollectionUtils.isEmpty(channelContentIds)) {
|
|
|
+ private List<CrawlerMetaArticle> getByUniqueIndexIn(List<String> md5List) {
|
|
|
+ if (CollectionUtils.isEmpty(md5List)) {
|
|
|
return new ArrayList<>();
|
|
|
}
|
|
|
- return crawlerMetaArticleRepository.getByChannelContentIdIn(channelContentIds);
|
|
|
+ return crawlerMetaArticleRepository.getByUniqueIndexIn(md5List);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ public static String generateArticleUniqueMd5(String url) {
|
|
|
+ // Extract parts from the URL
|
|
|
+ String biz = extractParameter(url, "biz=");
|
|
|
+ String idx = extractParameter(url, "&idx=");
|
|
|
+ String sn = extractParameter(url, "&sn=");
|
|
|
+
|
|
|
+ // Combine the parts and encode them to bytes
|
|
|
+ String urlBit = String.format("%s-%s-%s", biz, idx, sn);
|
|
|
+ return Md5Util.encoderByMd5(urlBit);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String extractParameter(String url, String parameter) {
|
|
|
+ int start = url.indexOf(parameter) + parameter.length();
|
|
|
+ if (start == -1 || start == url.length()) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+ int end = url.indexOf("&", start);
|
|
|
+ if (end == -1) {
|
|
|
+ return url.substring(start);
|
|
|
+ } else {
|
|
|
+ return url.substring(start, end);
|
|
|
+ }
|
|
|
}
|
|
|
|
|
|
public void setTitleAvgViewCount(List<Content> contentList) {
|
|
@@ -197,4 +230,10 @@ public class RecallService implements ApplicationContextAware {
|
|
|
log.info("setTitleAvgViewCount cost:{}", System.currentTimeMillis() - start);
|
|
|
}
|
|
|
|
|
|
+ public static void main(String[] args) {
|
|
|
+ String url = "http://mp.weixin.qq.com/s?__biz=Mzg2ODk4MTg3OQ==&mid=2247488306&idx=1&sn=93ebadc5bc7161a0dee48355013d3bc4&chksm=cfb6c1cb2bcdd80dd16d5d604d741a0019ae791125265a042d26100ba21ddb9e5c643ecc2264&scene=126&sessionid=1679649075#rd";
|
|
|
+ String md5 = generateArticleUniqueMd5(url);
|
|
|
+ System.out.println("Generated md5: " + md5);
|
|
|
+ }
|
|
|
+
|
|
|
}
|