فهرست منبع

文章品类处理

wangyunpeng 8 ماه پیش
والد
کامیت
a00f57aea4

+ 70 - 27
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -14,6 +14,7 @@ import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
 import com.tzld.longarticle.recommend.server.model.dto.kimi.KimiResult;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
@@ -46,6 +47,7 @@ import org.springframework.util.StringUtils;
 
 import java.util.*;
 import java.util.concurrent.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 /**
@@ -340,6 +342,43 @@ public class ArticleService {
 
 
     public void articleCategory() {
+        addArticleCategoryByCrawlerPlan();
+        addArticleCategoryByProducePlan();
+        dealArticleCategory();
+    }
+
+    private void dealArticleCategory() {
+        List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
+        List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
+        for (List<ArticleCategory> partition : partitionList) {
+            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
+            String prompt = buildKimiPrompt(partitionTitles);
+            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
+            long now = System.currentTimeMillis();
+            JSONObject obj = null;
+            if (kimiResult.isSuccess()) {
+                try {
+                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                } catch (Exception e) {
+                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                }
+            }
+            for (ArticleCategory articleCategory : partition) {
+                articleCategory.setKimiResult(kimiResult.getResponseStr());
+                articleCategory.setUpdateTimestamp(now);
+                if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
+                    articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                } else {
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
+                    articleCategory.setFailReason(kimiResult.getFailReason());
+                }
+                articleCategoryRepository.save(articleCategory);
+            }
+        }
+    }
+
+    private void addArticleCategoryByCrawlerPlan() {
         List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
         for (ArticleCrawlerPlan crawlerPlan : articleCrawlerPlanList) {
             List<ProduceContentCrawlerVO> list = crawlerContentByPlanService.getCrawlerContentByPlan(crawlerPlan.getCrawlerPlanId(), producePlanIds);
@@ -370,35 +409,39 @@ public class ArticleService {
                 articleCrawlerPlanRepository.save(crawlerPlan);
             }
         }
-        List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
-        List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
-        for (List<ArticleCategory> partition : partitionList) {
-            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
-            String prompt = buildKimiPrompt(partitionTitles);
-            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
-            long now = System.currentTimeMillis();
-            JSONObject obj = null;
-            if (kimiResult.isSuccess()) {
-                try {
-                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                } catch (Exception e) {
-                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                }
-            }
-            for (ArticleCategory articleCategory : partition) {
-                articleCategory.setKimiResult(kimiResult.getResponseStr());
-                articleCategory.setUpdateTimestamp(now);
-                if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
-                    articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
-                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-                } else {
-                    articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
-                    articleCategory.setFailReason(kimiResult.getFailReason());
-                }
-                articleCategoryRepository.save(articleCategory);
+    }
+
+    private void addArticleCategoryByProducePlan() {
+        List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
+        List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
+        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
+        List<String>  articleCategoryIds = articleCategoryList.stream().map(ArticleCategory::getChannelContentId).collect(Collectors.toList());
+        List<ProduceContentCrawlerVO> list = produceContentList.stream().filter(o -> !articleCategoryIds.contains(o.getChannelContentId())).map(o -> {
+            ProduceContentCrawlerVO item = new ProduceContentCrawlerVO();
+            item.setChannelContentId(o.getChannelContentId());
+            item.setProduceContentId(o.getPlanExeId());
+            return item;
+        }).collect(Collectors.toList());
+        channelContentIds = channelContentIds.stream().filter(o -> !articleCategoryIds.contains(o)).collect(Collectors.toList());
+        List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
+        Map<String, CrawlerContent> map = crawlerContentList.stream().collect(Collectors.toMap(CrawlerContent::getChannelContentId, Function.identity()));
+        long now = System.currentTimeMillis();
+        List<ArticleCategory> saveList = new ArrayList<>();
+        for (ProduceContentCrawlerVO vo : list) {
+            ArticleCategory item = new ArticleCategory();
+            item.setChannelContentId(vo.getChannelContentId());
+            item.setProduceContentId(vo.getProduceContentId());
+            CrawlerContent crawlerContent = map.get(vo.getChannelContentId());
+            if (Objects.nonNull(crawlerContent)) {
+                String title = crawlerContent.getTitle();
+                item.setCrawlerPlanId(crawlerContent.getCrawlerPlanId());
+                item.setTitle(title);
+                item.setTitleMd5(Md5Util.encoderByMd5(title));
+                item.setCreateTimestamp(now);
+                saveList.add(item);
             }
         }
-
+        longArticleBaseMapper.batchInsertArticleCategory(saveList);
     }
 
     private String buildKimiPrompt(List<String> titleList) {

+ 7 - 4
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -189,11 +189,10 @@ public class RecallService implements ApplicationContextAware {
         Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream().collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
         List<String> produceContentIds = sourceMap.values().stream().map(ArticlePoolPromotionSource::getRootProduceContentId).collect(Collectors.toList());
         // 根据produceContentId查询category
-        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByProduceContentIdIn(produceContentIds);
+        List<ArticleCategory> articleCategoryList = articleCategoryRepository.findAll();
         Map<String, ArticleCategory> categoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
-        // 冷启层直接用channelContentId查询
-        List<ArticleCategory> coldStartArticleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
-        Map<String, ArticleCategory> coldStartCategoryMap = coldStartArticleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+        Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+        Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream().collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
         for (Content content : contentList) {
             ArticlePoolPromotionSource source = sourceMap.get(content.getCrawlerChannelContentId());
             ArticleCategory category = null;
@@ -203,6 +202,10 @@ public class RecallService implements ApplicationContextAware {
             if (Objects.isNull(category)) {
                 category = coldStartCategoryMap.get(content.getCrawlerChannelContentId());
             }
+            if (Objects.isNull(category)) {
+                String titleMd5 = Md5Util.encoderByMd5(content.getTitle());
+                category = titleCategoryMap.get(titleMd5);
+            }
             if (Objects.nonNull(category)) {
                 content.setCategory(Collections.singletonList(category.getCategory()));
                 continue;