소스 검색

Merge branch 'wyp/1225-videoPoolCategory' of Server/long-article-recommend into master

wangyunpeng 6 달 전
부모
커밋
58849671c7

+ 24 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/ArticleCategoryMapper.java

@@ -0,0 +1,24 @@
+package com.tzld.longarticle.recommend.server.mapper.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
+import com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO;
+import org.apache.ibatis.annotations.Mapper;
+
+import java.util.List;
+
+@Mapper
+public interface ArticleCategoryMapper {
+
+    void batchInsertArticleCategory(List<ArticleCategory> list);
+
+    Integer articleCategoryCount(String title);
+
+    List<ArticleCategoryListVO> articleCategoryList(String title, Integer offset, Integer pageSize);
+
+    void updateArticleCategory(String title, String category);
+
+    void updateDatastatScoreCategory(String title, String category);
+
+    List<PublishSingleVideoSource> getVideoPoolArticleCategoryDealList();
+}

+ 1 - 12
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -3,10 +3,8 @@ package com.tzld.longarticle.recommend.server.mapper.longArticle;
 import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
 import com.tzld.longarticle.recommend.server.model.dto.LongArticlesCrawlerVideos;
 import com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesText;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.*;
 import com.tzld.longarticle.recommend.server.model.param.ArticleVideoPoolSourceParam;
-import com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO;
 import org.apache.ibatis.annotations.Mapper;
 
 import java.util.List;
@@ -30,8 +28,6 @@ public interface LongArticleBaseMapper {
 
     void batchInsertDatastatScore(List<DatastatScore> list);
 
-    void batchInsertArticleCategory(List<ArticleCategory> list);
-
     List<DatastatSortStrategy> getArticlePromotion(Integer viewCount, Double viewCountRate,
                                                    Integer fans, String dateStr, List<Integer> positions);
 
@@ -71,15 +67,8 @@ public interface LongArticleBaseMapper {
 
     List<String> getExistsOssPath();
 
-    Integer articleCategoryCount(String title);
-
-    List<ArticleCategoryListVO> articleCategoryList(String title, Integer offset, Integer pageSize);
-
-    void updateArticleCategory(String title, String category);
-
-    void updateDatastatScoreCategory(String title, String category);
-
     void updateVideoPoolContentBad(String contentTraceId);
 
     void batchInsertArticleReMatchRecord(List<ArticleReMatchRecord> list);
+
 }

+ 5 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -32,6 +32,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.GetOffVideoCrawl
 import com.tzld.longarticle.recommend.server.repository.crawler.LongArticlesVideoRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.*;
 import com.tzld.longarticle.recommend.server.repository.model.PushMessageCallbackExample;
+import com.tzld.longarticle.recommend.server.service.recommend.ArticleCategoryService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticlePromotionService;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
 import com.tzld.longarticle.recommend.server.service.recommend.recall.RecallService;
@@ -90,6 +91,8 @@ public class XxlJobService {
     @Autowired
     private ArticleService articleService;
     @Autowired
+    private ArticleCategoryService articleCategoryService;
+    @Autowired
     private ArticlePromotionService articlePromotionService;
     @Autowired
     private ArticleTitleHisCacheRepository articleTitleHisCacheRepository;
@@ -647,13 +650,13 @@ public class XxlJobService {
 
     @XxlJob("articleCategoryJob")
     public ReturnT<String> articleCategoryJob(String param) {
-        articleService.articleCategory();
+        articleCategoryService.articleCategory();
         return ReturnT.SUCCESS;
     }
 
     @XxlJob("articleCategoryJobRetry")
     public ReturnT<String> articleCategoryJobRetry(String param) {
-        articleService.articleCategoryJobRetry();
+        articleCategoryService.articleCategoryJobRetry();
         return ReturnT.SUCCESS;
     }
 

+ 292 - 7
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleCategoryService.java

@@ -1,28 +1,77 @@
 package com.tzld.longarticle.recommend.server.service.recommend;
 
-import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.alibaba.fastjson.JSONObject;
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.google.common.collect.Lists;
+import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
+import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.ArticleCategoryMapper;
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
+import com.tzld.longarticle.recommend.server.model.dto.kimi.KimiResult;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCrawlerPlan;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
 import com.tzld.longarticle.recommend.server.model.param.ArticleCategoryListParam;
 import com.tzld.longarticle.recommend.server.model.param.ArticleCategoryUpdateParam;
 import com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO;
+import com.tzld.longarticle.recommend.server.model.vo.ProduceContentCrawlerVO;
+import com.tzld.longarticle.recommend.server.remote.CrawlerContentByPlanService;
+import com.tzld.longarticle.recommend.server.remote.KimiApiService;
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCrawlerPlanRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
+import com.tzld.longarticle.recommend.server.util.DateUtils;
+import com.tzld.longarticle.recommend.server.util.Md5Util;
 import com.tzld.longarticle.recommend.server.util.page.Page;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
+import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
+import org.springframework.util.StringUtils;
 
-import java.util.List;
+import java.util.*;
+import java.util.function.Function;
+import java.util.stream.Collectors;
 
 @Service
 @Slf4j
 public class ArticleCategoryService {
 
     @Autowired
-    private LongArticleBaseMapper longArticleBaseMapper;
+    private ArticleCategoryMapper articleCategoryMapper;
+    @Autowired
+    ArticleCategoryRepository articleCategoryRepository;
+    @Autowired
+    ArticleCrawlerPlanRepository articleCrawlerPlanRepository;
+    @Autowired
+    ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
+    @Autowired
+    CrawlerContentByPlanService crawlerContentByPlanService;
+    @Autowired
+    AigcBaseMapper aigcBaseMapper;
+    @Autowired
+    KimiApiService kimiApiService;
+
+    @ApolloJsonValue("${cold.pool.produce.planId:[\"20240802021606053813696\", \"20240802080355355308981\",\n" +
+            "\"20240805154433785506170\", \"20240805154359027876170\", \"20241024100016206421084\", " +
+            "\"20241030070010871546586\"]}")
+    private static List<String> producePlanIds;
+    @Value("${kimiCategoryPrompt:}")
+    private String kimiCategoryPrompt;
+
+    @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
+    private Map<String, Map<String, Map<String, String>>> produceConfig;
 
 
     public Page<ArticleCategoryListVO> articleCategoryList(ArticleCategoryListParam param) {
         int offset = (param.getPageNum() - 1) * param.getPageSize();
-        int count = longArticleBaseMapper.articleCategoryCount(param.getTitle());
-        List<ArticleCategoryListVO> list = longArticleBaseMapper.articleCategoryList(param.getTitle(), offset, param.getPageSize());
+        int count = articleCategoryMapper.articleCategoryCount(param.getTitle());
+        List<ArticleCategoryListVO> list = articleCategoryMapper.articleCategoryList(param.getTitle(), offset, param.getPageSize());
         Page<ArticleCategoryListVO> page = new Page<>(param.getPageNum(), param.getPageSize());
         page.setTotalSize(count);
         page.setObjs(list);
@@ -30,7 +79,243 @@ public class ArticleCategoryService {
     }
 
     public void articleCategoryUpdate(ArticleCategoryUpdateParam param) {
-        longArticleBaseMapper.updateArticleCategory(param.getTitle(), param.getCategory());
-        longArticleBaseMapper.updateDatastatScoreCategory(param.getTitle(), param.getCategory());
+        articleCategoryMapper.updateArticleCategory(param.getTitle(), param.getCategory());
+        articleCategoryMapper.updateDatastatScoreCategory(param.getTitle(), param.getCategory());
+    }
+
+    public void articleCategory() {
+        // 根据抓取计划 添加品类处理任务
+        addArticleCategoryByCrawlerPlan();
+        // 冷启层 生成计划 添加品类处理任务
+        addColdArticleCategoryByProducePlan();
+        // 晋级 生成计划 添加品类处理任务
+        addPromotionArticleCategoryByProducePlan();
+        // 视频内容池 添加品类处理任务
+        addVideoPoolArticleCategory();
+        // 调用kimi进行内容分类
+        dealArticleCategory();
+    }
+
+    private void dealArticleCategory() {
+        List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
+        List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
+        for (List<ArticleCategory> partition : partitionList) {
+            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
+            String prompt = buildKimiPrompt(partitionTitles);
+            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
+            long now = System.currentTimeMillis();
+            JSONObject obj = null;
+            if (kimiResult.isSuccess()) {
+                try {
+                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                } catch (Exception e) {
+                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                }
+            }
+            for (ArticleCategory articleCategory : partition) {
+                articleCategory.setKimiResult(kimiResult.getResponseStr());
+                articleCategory.setUpdateTimestamp(now);
+                if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
+                    articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                } else {
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
+                    articleCategory.setFailReason(kimiResult.getFailReason());
+                }
+                articleCategoryRepository.save(articleCategory);
+            }
+        }
+    }
+
+    private void addArticleCategoryByCrawlerPlan() {
+        List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
+        for (ArticleCrawlerPlan crawlerPlan : articleCrawlerPlanList) {
+            List<ProduceContentCrawlerVO> list = crawlerContentByPlanService.getCrawlerContentByPlan(crawlerPlan.getCrawlerPlanId(), producePlanIds);
+            List<String> produceContentIds = list.stream().map(ProduceContentCrawlerVO::getProduceContentId).collect(Collectors.toList());
+            List<ArticleCategory> exists = articleCategoryRepository.getByProduceContentIdIn(produceContentIds);
+            List<String> existsIds = exists.stream().map(ArticleCategory::getProduceContentId).collect(Collectors.toList());
+            list = list.stream().filter(o -> !existsIds.contains(o.getProduceContentId())).collect(Collectors.toList());
+            long now = System.currentTimeMillis();
+            List<ArticleCategory> saveList = new ArrayList<>();
+            for (ProduceContentCrawlerVO vo : list) {
+                ArticleCategory item = new ArticleCategory();
+                item.setCrawlerPlanId(crawlerPlan.getCrawlerPlanId());
+                item.setChannelContentId(vo.getChannelContentId());
+                item.setProduceContentId(vo.getProduceContentId());
+                item.setTitle(vo.getTitle());
+                item.setTitleMd5(Md5Util.encoderByMd5(vo.getTitle()));
+                item.setCreateTimestamp(now);
+                saveList.add(item);
+            }
+            if (CollectionUtils.isNotEmpty(saveList)) {
+                articleCategoryMapper.batchInsertArticleCategory(saveList);
+            }
+            // 抓取计划超过5天设置为已处理
+            String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 8);
+            if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400 * 5) {
+                crawlerPlan.setStatus(StatusEnum.ONE.getCode());
+                crawlerPlan.setUpdateTimestamp(now);
+                articleCrawlerPlanRepository.save(crawlerPlan);
+            }
+        }
+    }
+
+    private void addColdArticleCategoryByProducePlan() {
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(producePlanIds);
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            articleCategoryMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    private void addPromotionArticleCategoryByProducePlan() {
+        List<String> articlePromotionProducePlanIds = new ArrayList<>();
+        // 获取晋级生成计划Id
+        for (Map.Entry<String, Map<String, Map<String, String>>> oneEntry : produceConfig.entrySet()) {
+            for (Map.Entry<String, Map<String, String>> twoEntry : oneEntry.getValue().entrySet()) {
+                twoEntry.getValue().forEach((key, value) -> {
+                    if (StringUtils.hasText(value) && !producePlanIds.contains(value)) {
+                        articlePromotionProducePlanIds.add(value);
+                    }
+                });
+            }
+        }
+        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(articlePromotionProducePlanIds);
+        // 已晋级文章 先溯源查找源内容品类,查询不到再用kimi进行分类
+        if (CollectionUtils.isNotEmpty(saveList)) {
+            List<String> channelContentIds = saveList.stream().map(ArticleCategory::getChannelContentId)
+                    .collect(Collectors.toList());
+            // 查询晋升rootProduceContentId
+            List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
+                    .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
+                            ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
+            Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
+                    .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
+            // 根据produceContentId查询category
+            List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+            Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
+            Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
+            Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
+                    .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
+            for (ArticleCategory articleCategory : saveList) {
+                ArticlePoolPromotionSource source = sourceMap.get(articleCategory.getChannelContentId());
+                ArticleCategory category = null;
+                if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
+                    category = categoryMap.get(source.getRootProduceContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = coldStartCategoryMap.get(articleCategory.getChannelContentId());
+                }
+                if (Objects.isNull(category)) {
+                    category = titleCategoryMap.get(articleCategory.getTitleMd5());
+                }
+                if (Objects.nonNull(category) && StringUtils.hasText(category.getCategory())) {
+                    articleCategory.setCategory(category.getCategory());
+                    articleCategory.setKimiResult(category.getKimiResult());
+                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                }
+            }
+            articleCategoryMapper.batchInsertArticleCategory(saveList);
+        }
+    }
+
+    private void addVideoPoolArticleCategory() {
+        List<ArticleCategory> saveList = new ArrayList<>();
+        // 查找所有待处理视频内容池内容
+        List<PublishSingleVideoSource> dealList = articleCategoryMapper.getVideoPoolArticleCategoryDealList();
+        if (CollectionUtils.isEmpty(dealList)) {
+            return;
+        }
+        long now = System.currentTimeMillis();
+        for (PublishSingleVideoSource videoSource : dealList) {
+            ArticleCategory item = new ArticleCategory();
+            item.setProduceContentId(videoSource.getContentTraceId());
+            item.setTitle(videoSource.getArticleTitle());
+            item.setTitleMd5(Md5Util.encoderByMd5(videoSource.getArticleTitle()));
+            item.setCreateTimestamp(now);
+            saveList.add(item);
+        }
+        articleCategoryMapper.batchInsertArticleCategory(saveList);
+    }
+
+    /**
+     * 根据生成计划获取需要进行分类内容
+     */
+    private List<ArticleCategory> addArticleCategoryByProducePlan(List<String> producePlanIds) {
+        List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
+        List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
+        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
+        List<String> articleCategoryIds = articleCategoryList.stream().map(ArticleCategory::getChannelContentId).collect(Collectors.toList());
+        List<ProduceContentCrawlerVO> list = produceContentList.stream().filter(o -> !articleCategoryIds.contains(o.getChannelContentId())).map(o -> {
+            ProduceContentCrawlerVO item = new ProduceContentCrawlerVO();
+            item.setChannelContentId(o.getChannelContentId());
+            item.setProduceContentId(o.getPlanExeId());
+            return item;
+        }).collect(Collectors.toList());
+        channelContentIds = channelContentIds.stream().filter(o -> !articleCategoryIds.contains(o)).collect(Collectors.toList());
+        if (CollectionUtils.isEmpty(channelContentIds)) {
+            return Collections.emptyList();
+        }
+        List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
+        Map<String, CrawlerContent> map = crawlerContentList.stream().collect(Collectors.toMap(CrawlerContent::getChannelContentId, Function.identity()));
+        long now = System.currentTimeMillis();
+        List<ArticleCategory> saveList = new ArrayList<>();
+        for (ProduceContentCrawlerVO vo : list) {
+            ArticleCategory item = new ArticleCategory();
+            item.setChannelContentId(vo.getChannelContentId());
+            item.setProduceContentId(vo.getProduceContentId());
+            CrawlerContent crawlerContent = map.get(vo.getChannelContentId());
+            if (Objects.nonNull(crawlerContent)) {
+                String title = crawlerContent.getTitle();
+                item.setCrawlerPlanId(crawlerContent.getCrawlerPlanId());
+                item.setTitle(title);
+                item.setTitleMd5(Md5Util.encoderByMd5(title));
+                item.setCreateTimestamp(now);
+                saveList.add(item);
+            }
+        }
+        return saveList;
+    }
+
+    private String buildKimiPrompt(List<String> titleList) {
+        StringBuilder prompt = new StringBuilder(kimiCategoryPrompt);
+        prompt.append("\n");
+        for (String title : titleList) {
+            prompt.append(title).append("\n");
+        }
+        return prompt.toString();
+    }
+
+    public void articleCategoryJobRetry() {
+        List<ArticleCategory> dealList = articleCategoryRepository.getByStatusAndRetryTimesLessThan(ArticleCategoryStatusEnum.FAIL.getCode(), 3);
+        for (ArticleCategory articleCategory : dealList) {
+            List<String> partitionTitles = Collections.singletonList(articleCategory.getTitle());
+            String prompt = buildKimiPrompt(partitionTitles);
+            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
+            long now = System.currentTimeMillis();
+            JSONObject obj = null;
+            if (kimiResult.isSuccess()) {
+                try {
+                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                } catch (Exception e) {
+                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                }
+            }
+            articleCategory.setKimiResult(kimiResult.getResponseStr());
+            articleCategory.setUpdateTimestamp(now);
+            articleCategory.setRetryTimes(articleCategory.getRetryTimes() + 1);
+            if (kimiResult.isSuccess() && Objects.nonNull(obj)) {
+                List<String> keys = new ArrayList<>(obj.keySet());
+                String category = obj.getString(keys.get(0));
+                articleCategory.setCategory(category);
+                articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
+                articleCategory.setFailReason(null);
+            } else {
+                articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
+                articleCategory.setFailReason(kimiResult.getFailReason());
+            }
+            articleCategoryRepository.save(articleCategory);
+        }
     }
 }

+ 4 - 257
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -1,54 +1,39 @@
 package com.tzld.longarticle.recommend.server.service.recommend;
 
-import com.alibaba.fastjson.JSONObject;
-import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
-import com.google.common.collect.Lists;
 import com.google.common.util.concurrent.ThreadFactoryBuilder;
 import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
 import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishContentTypeEnum;
-import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
 import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
-import com.tzld.longarticle.recommend.server.model.dto.kimi.KimiResult;
-import com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
-import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCrawlerPlan;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
 import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
-import com.tzld.longarticle.recommend.server.model.vo.ProduceContentCrawlerVO;
 import com.tzld.longarticle.recommend.server.model.vo.RootPublishContentVO;
-import com.tzld.longarticle.recommend.server.remote.CrawlerContentByPlanService;
-import com.tzld.longarticle.recommend.server.remote.KimiApiService;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
 import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentOutputRepository;
-import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
-import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCrawlerPlanRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
-import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
-import com.tzld.longarticle.recommend.server.util.Md5Util;
 import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.BeanUtils;
 import org.springframework.beans.factory.annotation.Autowired;
-import org.springframework.beans.factory.annotation.Value;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
 
-import java.util.*;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+import java.util.Objects;
 import java.util.concurrent.*;
-import java.util.function.Function;
 import java.util.stream.Collectors;
 
 /**
@@ -58,10 +43,6 @@ import java.util.stream.Collectors;
 @Slf4j
 public class ArticleService {
 
-    @Autowired
-    AccountIndexAvgViewCountService accountIndexAvgViewCountService;
-    @Autowired
-    AccountAvgInfoRepository accountAvgInfoRepository;
     @Autowired
     ArticleRepository articleRepository;
     @Autowired
@@ -76,24 +57,6 @@ public class ArticleService {
     LongArticleBaseMapper longArticleBaseMapper;
     @Autowired
     ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
-    @Autowired
-    ArticleCrawlerPlanRepository articleCrawlerPlanRepository;
-    @Autowired
-    ArticleCategoryRepository articleCategoryRepository;
-    @Autowired
-    CrawlerContentByPlanService crawlerContentByPlanService;
-    @Autowired
-    KimiApiService kimiApiService;
-
-    @ApolloJsonValue("${cold.pool.produce.planId:[\"20240802021606053813696\", \"20240802080355355308981\",\n" +
-            "\"20240805154433785506170\", \"20240805154359027876170\", \"20241024100016206421084\", " +
-            "\"20241030070010871546586\"]}")
-    private static List<String> producePlanIds;
-    @Value("${kimiCategoryPrompt:}")
-    private String kimiCategoryPrompt;
-
-    @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
-    private Map<String, Map<String, Map<String, String>>> produceConfig;
 
     private final static ExecutorService pool = new CommonThreadPoolExecutor(
             32,
@@ -381,222 +344,6 @@ public class ArticleService {
         }
     }
 
-
-    public void articleCategory() {
-        // 根据抓取计划 添加品类处理任务
-        addArticleCategoryByCrawlerPlan();
-        // 冷启层 生成计划 添加品类处理任务
-        addColdArticleCategoryByProducePlan();
-        // 晋级 生成计划 添加品类处理任务
-        addPromotionArticleCategoryByProducePlan();
-        // 调用kimi进行内容分类
-        dealArticleCategory();
-    }
-
-    private void dealArticleCategory() {
-        List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
-        List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
-        for (List<ArticleCategory> partition : partitionList) {
-            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
-            String prompt = buildKimiPrompt(partitionTitles);
-            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
-            long now = System.currentTimeMillis();
-            JSONObject obj = null;
-            if (kimiResult.isSuccess()) {
-                try {
-                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                } catch (Exception e) {
-                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                }
-            }
-            for (ArticleCategory articleCategory : partition) {
-                articleCategory.setKimiResult(kimiResult.getResponseStr());
-                articleCategory.setUpdateTimestamp(now);
-                if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
-                    articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
-                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-                } else {
-                    articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
-                    articleCategory.setFailReason(kimiResult.getFailReason());
-                }
-                articleCategoryRepository.save(articleCategory);
-            }
-        }
-    }
-
-    private void addArticleCategoryByCrawlerPlan() {
-        List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
-        for (ArticleCrawlerPlan crawlerPlan : articleCrawlerPlanList) {
-            List<ProduceContentCrawlerVO> list = crawlerContentByPlanService.getCrawlerContentByPlan(crawlerPlan.getCrawlerPlanId(), producePlanIds);
-            List<String> produceContentIds = list.stream().map(ProduceContentCrawlerVO::getProduceContentId).collect(Collectors.toList());
-            List<ArticleCategory> exists = articleCategoryRepository.getByProduceContentIdIn(produceContentIds);
-            List<String> existsIds = exists.stream().map(ArticleCategory::getProduceContentId).collect(Collectors.toList());
-            list = list.stream().filter(o -> !existsIds.contains(o.getProduceContentId())).collect(Collectors.toList());
-            long now = System.currentTimeMillis();
-            List<ArticleCategory> saveList = new ArrayList<>();
-            for (ProduceContentCrawlerVO vo : list) {
-                ArticleCategory item = new ArticleCategory();
-                item.setCrawlerPlanId(crawlerPlan.getCrawlerPlanId());
-                item.setChannelContentId(vo.getChannelContentId());
-                item.setProduceContentId(vo.getProduceContentId());
-                item.setTitle(vo.getTitle());
-                item.setTitleMd5(Md5Util.encoderByMd5(vo.getTitle()));
-                item.setCreateTimestamp(now);
-                saveList.add(item);
-            }
-            if (CollectionUtils.isNotEmpty(saveList)) {
-                longArticleBaseMapper.batchInsertArticleCategory(saveList);
-            }
-            // 抓取计划超过5天设置为已处理
-            String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 8);
-            if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400 * 5) {
-                crawlerPlan.setStatus(StatusEnum.ONE.getCode());
-                crawlerPlan.setUpdateTimestamp(now);
-                articleCrawlerPlanRepository.save(crawlerPlan);
-            }
-        }
-    }
-
-    private void addColdArticleCategoryByProducePlan() {
-        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(producePlanIds);
-        if (CollectionUtils.isNotEmpty(saveList)) {
-            longArticleBaseMapper.batchInsertArticleCategory(saveList);
-        }
-    }
-
-    private void addPromotionArticleCategoryByProducePlan() {
-        List<String> articlePromotionProducePlanIds = new ArrayList<>();
-        // 获取晋级生成计划Id
-        for (Map.Entry<String, Map<String, Map<String, String>>> oneEntry : produceConfig.entrySet()) {
-            for (Map.Entry<String, Map<String, String>> twoEntry : oneEntry.getValue().entrySet()) {
-                twoEntry.getValue().forEach((key, value) -> {
-                    if (StringUtils.hasText(value) && !producePlanIds.contains(value)) {
-                        articlePromotionProducePlanIds.add(value);
-                    }
-                });
-            }
-        }
-        List<ArticleCategory> saveList = addArticleCategoryByProducePlan(articlePromotionProducePlanIds);
-        // 已晋级文章 先溯源查找源内容品类,查询不到再用kimi进行分类
-        if (CollectionUtils.isNotEmpty(saveList)) {
-            List<String> channelContentIds = saveList.stream().map(ArticleCategory::getChannelContentId)
-                    .collect(Collectors.toList());
-            // 查询晋升rootProduceContentId
-            List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
-                    .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
-                            ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
-            Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
-                    .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
-            // 根据produceContentId查询category
-            List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-            Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
-                    .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
-            Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
-                    .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
-            Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
-                    .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
-            for (ArticleCategory articleCategory : saveList) {
-                ArticlePoolPromotionSource source = sourceMap.get(articleCategory.getChannelContentId());
-                ArticleCategory category = null;
-                if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
-                    category = categoryMap.get(source.getRootProduceContentId());
-                }
-                if (Objects.isNull(category)) {
-                    category = coldStartCategoryMap.get(articleCategory.getChannelContentId());
-                }
-                if (Objects.isNull(category)) {
-                    category = titleCategoryMap.get(articleCategory.getTitleMd5());
-                }
-                if (Objects.nonNull(category) && StringUtils.hasText(category.getCategory())) {
-                    articleCategory.setCategory(category.getCategory());
-                    articleCategory.setKimiResult(category.getKimiResult());
-                    articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-                }
-            }
-            longArticleBaseMapper.batchInsertArticleCategory(saveList);
-        }
-    }
-
-    /**
-     * 根据生成计划获取需要进行分类内容
-     */
-    private List<ArticleCategory> addArticleCategoryByProducePlan(List<String> producePlanIds) {
-        List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
-        List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
-        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
-        List<String> articleCategoryIds = articleCategoryList.stream().map(ArticleCategory::getChannelContentId).collect(Collectors.toList());
-        List<ProduceContentCrawlerVO> list = produceContentList.stream().filter(o -> !articleCategoryIds.contains(o.getChannelContentId())).map(o -> {
-            ProduceContentCrawlerVO item = new ProduceContentCrawlerVO();
-            item.setChannelContentId(o.getChannelContentId());
-            item.setProduceContentId(o.getPlanExeId());
-            return item;
-        }).collect(Collectors.toList());
-        channelContentIds = channelContentIds.stream().filter(o -> !articleCategoryIds.contains(o)).collect(Collectors.toList());
-        if (CollectionUtils.isEmpty(channelContentIds)) {
-            return Collections.emptyList();
-        }
-        List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
-        Map<String, CrawlerContent> map = crawlerContentList.stream().collect(Collectors.toMap(CrawlerContent::getChannelContentId, Function.identity()));
-        long now = System.currentTimeMillis();
-        List<ArticleCategory> saveList = new ArrayList<>();
-        for (ProduceContentCrawlerVO vo : list) {
-            ArticleCategory item = new ArticleCategory();
-            item.setChannelContentId(vo.getChannelContentId());
-            item.setProduceContentId(vo.getProduceContentId());
-            CrawlerContent crawlerContent = map.get(vo.getChannelContentId());
-            if (Objects.nonNull(crawlerContent)) {
-                String title = crawlerContent.getTitle();
-                item.setCrawlerPlanId(crawlerContent.getCrawlerPlanId());
-                item.setTitle(title);
-                item.setTitleMd5(Md5Util.encoderByMd5(title));
-                item.setCreateTimestamp(now);
-                saveList.add(item);
-            }
-        }
-        return saveList;
-    }
-
-    private String buildKimiPrompt(List<String> titleList) {
-        StringBuilder prompt = new StringBuilder(kimiCategoryPrompt);
-        prompt.append("\n");
-        for (String title : titleList) {
-            prompt.append(title).append("\n");
-        }
-        return prompt.toString();
-    }
-
-    public void articleCategoryJobRetry() {
-        List<ArticleCategory> dealList = articleCategoryRepository.getByStatusAndRetryTimesLessThan(ArticleCategoryStatusEnum.FAIL.getCode(), 3);
-        for (ArticleCategory articleCategory : dealList) {
-            List<String> partitionTitles = Collections.singletonList(articleCategory.getTitle());
-            String prompt = buildKimiPrompt(partitionTitles);
-            KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
-            long now = System.currentTimeMillis();
-            JSONObject obj = null;
-            if (kimiResult.isSuccess()) {
-                try {
-                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                } catch (Exception e) {
-                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
-                }
-            }
-            articleCategory.setKimiResult(kimiResult.getResponseStr());
-            articleCategory.setUpdateTimestamp(now);
-            articleCategory.setRetryTimes(articleCategory.getRetryTimes() + 1);
-            if (kimiResult.isSuccess() && Objects.nonNull(obj)) {
-                List<String> keys = new ArrayList<>(obj.keySet());
-                String category = obj.getString(keys.get(0));
-                articleCategory.setCategory(category);
-                articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
-                articleCategory.setFailReason(null);
-            } else {
-                articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
-                articleCategory.setFailReason(kimiResult.getFailReason());
-            }
-            articleCategoryRepository.save(articleCategory);
-        }
-    }
-
     public String getPublishContentByWxSn(String wxSn) {
         Article article = articleRepository.getByWxSn(wxSn);
         PublishAccount publishAccount = publishAccountRepository.getByGhId(article.getGhId());

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -420,6 +420,7 @@ public class RecallService implements ApplicationContextAware {
         Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
                 .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
         Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
+                .filter(o -> Objects.nonNull(o.getChannelContentId()))
                 .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
         Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
                 .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));

+ 62 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/ArticleCategoryMapper.xml

@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!DOCTYPE mapper PUBLIC "-//mybatis.org//DTD Mapper 3.0//EN" "http://mybatis.org/dtd/mybatis-3-mapper.dtd">
+<mapper namespace="com.tzld.longarticle.recommend.server.mapper.longArticle.ArticleCategoryMapper">
+
+    <insert id="batchInsertArticleCategory">
+        INSERT INTO article_category
+        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, category,
+        kimi_result, status, create_timestamp)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.produceContentId}, #{item.channelContentId}, #{item.crawlerPlanId}, #{item.title}, #{item.titleMd5},
+            #{item.category}, #{item.kimiResult}, #{item.status}, #{item.createTimestamp})
+        </foreach>
+    </insert>
+
+    <select id="articleCategoryList"
+            resultType="com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO">
+        select produce_content_id, title, category
+        from article_category
+        <where>
+            <if test="title != null and title != ''">
+                and title like concat('%', #{title}, '%')
+            </if>
+        </where>
+        limit #{offset}, #{pageSize}
+    </select>
+
+    <select id="articleCategoryCount" resultType="java.lang.Integer">
+        select count(1)
+        from article_category
+        <where>
+            <if test="title!= null and title!= ''">
+                and title like concat('%', #{title}, '%')
+            </if>
+        </where>
+    </select>
+
+    <update id="updateArticleCategory">
+        update article_category
+        set category = #{category}
+        where title = #{title};
+    </update>
+
+    <update id="updateDatastatScoreCategory">
+        update datastat_score
+        set original_category = category,
+            category          = #{category}
+        where title = #{title}
+          and category != #{category};
+    </update>
+
+    <select id="getVideoPoolArticleCategoryDealList"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource">
+        select *
+        from publish_single_video_source
+        where bad_status = 0
+          and audit_status = 1
+          and content_trace_id not in (select produce_content_id
+                                       from article_category)
+    </select>
+
+</mapper>

+ 0 - 43
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -85,17 +85,6 @@
         </foreach>
     </insert>
 
-    <insert id="batchInsertArticleCategory">
-        INSERT INTO article_category
-        (produce_content_id, channel_content_id, crawler_plan_id, title, title_md5, category,
-         kimi_result, status, create_timestamp)
-        VALUES
-        <foreach collection="list" item="item" separator=",">
-            (#{item.produceContentId}, #{item.channelContentId}, #{item.crawlerPlanId}, #{item.title}, #{item.titleMd5},
-            #{item.category}, #{item.kimiResult}, #{item.status}, #{item.createTimestamp})
-        </foreach>
-    </insert>
-
     <select id="getArticlePromotion"
             resultType="com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy">
         select *
@@ -260,38 +249,6 @@
         select distinct oss_path from long_articles_video_audit
     </select>
 
-    <select id="articleCategoryList"
-            resultType="com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO">
-        select produce_content_id, title, category
-        from article_category
-        <where>
-            <if test="title != null and title != ''">
-                and title like concat('%', #{title}, '%')
-            </if>
-        </where>
-        limit #{offset}, #{pageSize}
-    </select>
-
-    <select id="articleCategoryCount" resultType="java.lang.Integer">
-        select count(1)
-        from article_category
-        <where>
-            <if test="title!= null and title!= ''">
-                and title like concat('%', #{title}, '%')
-            </if>
-        </where>
-    </select>
-
-    <update id="updateArticleCategory">
-        update article_category set category = #{category} where title = #{title};
-    </update>
-
-    <update id="updateDatastatScoreCategory">
-        update datastat_score
-        set original_category = category, category = #{category}
-        where title = #{title} and category != #{category};
-    </update>
-
     <update id="updateVideoPoolContentBad">
         update publish_single_video_source set bad_status = 1 where content_trace_id = #{contentTraceId};
     </update>

+ 3 - 3
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/XxlJobTest.java

@@ -3,7 +3,7 @@ package com.tzld.longarticle.recommend.server;
 import com.alibaba.fastjson.JSONObject;
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
-import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.tzld.longarticle.recommend.server.mapper.longArticle.ArticleCategoryMapper;
 import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
 import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
 import com.tzld.longarticle.recommend.server.model.dto.PublishAccountTypeDTO;
@@ -30,7 +30,7 @@ public class XxlJobTest {
     @Resource
     private AigcBaseMapper aigcBaseMapper;
     @Resource
-    private LongArticleBaseMapper longArticleBaseMapper;
+    private ArticleCategoryMapper articleCategoryMapper;
     @Resource
     private ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
     @Resource
@@ -95,7 +95,7 @@ public class XxlJobTest {
                 saveList.add(item);
             }
         }
-        longArticleBaseMapper.batchInsertArticleCategory(saveList);
+        articleCategoryMapper.batchInsertArticleCategory(saveList);
 
     }