Bläddra i källkod

Merge branch 'wyp/1213-articleCategory' of Server/long-article-recommend into wyp/1206-titleHisCache

wangyunpeng 7 månader sedan
förälder
incheckning
25cc5f3772

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/ArticleCategory.java

@@ -39,7 +39,7 @@ public class ArticleCategory {
     private String kimiResult;
 
     @Column(name = "status")
-    private Integer status;
+    private Integer status = 0;
 
     @Column(name = "fail_reason")
     private String failReason;

+ 74 - 5
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -92,6 +92,9 @@ public class ArticleService {
     @Value("${kimiCategoryPrompt:}")
     private String kimiCategoryPrompt;
 
+    @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
+    private Map<String, Map<String, Map<String, String>>> produceConfig;
+
     private final static ExecutorService pool = new CommonThreadPoolExecutor(
             32,
             128,
@@ -380,8 +383,13 @@ public class ArticleService {
 
 
     public void articleCategory() {
+        // 根据抓取计划 添加品类处理任务
         addArticleCategoryByCrawlerPlan();
-        addArticleCategoryByProducePlan();
+        // 冷启层 生成计划 添加品类处理任务
+        addColdArticleCategoryByProducePlan();
+        // 晋级 生成计划 添加品类处理任务
+        addPromotionArticleCategoryByProducePlan();
+        // 调用kimi进行内容分类
         dealArticleCategory();
     }
 
@@ -449,7 +457,70 @@ public class ArticleService {
         }
     }
 
-    private void addArticleCategoryByProducePlan() {
+    private void addColdArticleCategoryByProducePlan() {
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(producePlanIds);
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            longArticleBaseMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    private void addPromotionArticleCategoryByProducePlan() {
+        List<String> articlePromotionProducePlanIds = new ArrayList<>();
+        // 获取晋级生成计划Id
+        for (Map.Entry<String, Map<String, Map<String, String>>> oneEntry : produceConfig.entrySet()) {
+            for (Map.Entry<String, Map<String, String>> twoEntry : oneEntry.getValue().entrySet()) {
+                twoEntry.getValue().forEach((key, value) -> {
+                    if (StringUtils.hasText(value) && !producePlanIds.contains(value)) {
+                        articlePromotionProducePlanIds.add(value);
+                    }
+                });
+            }
+        }
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(articlePromotionProducePlanIds);
+        // 已晋级文章 先溯源查找源内容品类,查询不到再用kimi进行分类
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            List<String> channelContentIds = saveList.stream().map(ArticleCategory::getChannelContentId)
+                    .collect(Collectors.toList());
+            // 查询晋升rootProduceContentId
+            List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
+                    .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
+                            ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
+            Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
+                    .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
+            // 根据produceContentId查询category
+            List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+            Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
+            Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+            Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
+            for (ArticleCategory articleCategory : saveList) {
+                ArticlePoolPromotionSource source = sourceMap.get(articleCategory.getChannelContentId());
+                ArticleCategory category = null;
+                if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
+                    category = categoryMap.get(source.getRootProduceContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = coldStartCategoryMap.get(articleCategory.getChannelContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = titleCategoryMap.get(articleCategory.getTitleMd5());
+                }
+                if (Objects.nonNull(category) && StringUtils.hasText(category.getCategory())) {
+                    articleCategory.setCategory(category.getCategory());
+                    articleCategory.setKimiResult(category.getKimiResult());
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                }
+            }
+            longArticleBaseMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    /**
+     * 根据生成计划获取需要进行分类内容
+     */
+    private List<ArticleCategory> addArticleCategoryByProducePlan(List<String> producePlanIds) {
         List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
         List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
         List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
@@ -479,9 +550,7 @@ public class ArticleService {
                 saveList.add(item);
             }
         }
-        if (CollectionUtils.isNotEmpty(saveList)) {
-            longArticleBaseMapper.batchInsertArticleCategory(saveList);
-        }
+        return saveList;
     }
 
     private String buildKimiPrompt(List<String> titleList) {

+ 9 - 9
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -413,15 +413,7 @@ public class RecallService implements ApplicationContextAware {
         for (TitleHisCacheParam cacheParam : paramList) {
             Content res = new Content();
             // 设置品类
-            ArticlePoolPromotionSource source = sourceMap.get(cacheParam.getCrawlerChannelContentId());
-            ArticleCategory category = null;
-            if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
-                category = categoryMap.get(source.getRootProduceContentId());
-                PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
-                if (Objects.nonNull(publishContent)) {
-                    res.setRootPublishTimestamp(publishContent.getPublishTimestamp());
-                }
-            }
+            ArticleCategory category = categoryMap.get(cacheParam.getSourceId());
             if (Objects.isNull(category)) {
                 category = coldStartCategoryMap.get(cacheParam.getCrawlerChannelContentId());
             }
@@ -431,6 +423,14 @@ public class RecallService implements ApplicationContextAware {
             if (Objects.nonNull(category)) {
                 res.setCategory(Collections.singletonList(category.getCategory()));
             }
+            // 溯源查找源发布时间
+            ArticlePoolPromotionSource source = sourceMap.get(cacheParam.getCrawlerChannelContentId());
+            if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
+                PublishContent publishContent = publishContentMap.get(source.getRootPublishContentId());
+                if (Objects.nonNull(publishContent)) {
+                    res.setRootPublishTimestamp(publishContent.getPublishTimestamp());
+                }
+            }
             // 设置历史表现
             List<Article> hisArticles = new ArrayList<>();
             Map<Integer, List<Article>> indexArticleMap = map.get(cacheParam.getTitle());

+ 3 - 2
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -87,11 +87,12 @@
 
     <insert id="batchInsertArticleCategory">
         INSERT INTO article_category
-        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, create_timestamp)
+        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, category,
+         kimi_result, status, create_timestamp)
         VALUES
         <foreach collection="list" item="item" separator=",">
             (#{item.produceContentId}, #{item.channelContentId}, #{item.crawlerPlanId}, #{item.title}, #{item.titleMd5},
-             #{item.createTimestamp})
+            #{item.category}, #{item.kimiResult}, #{item.status}, #{item.createTimestamp})
         </foreach>
     </insert>