|
@@ -1,28 +1,77 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
|
|
|
-import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
+import com.google.common.collect.Lists;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.ArticleCategoryMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.kimi.KimiResult;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCrawlerPlan;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.PublishSingleVideoSource;
|
|
|
import com.tzld.longarticle.recommend.server.model.param.ArticleCategoryListParam;
|
|
|
import com.tzld.longarticle.recommend.server.model.param.ArticleCategoryUpdateParam;
|
|
|
import com.tzld.longarticle.recommend.server.model.vo.ArticleCategoryListVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.ProduceContentCrawlerVO;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.CrawlerContentByPlanService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.KimiApiService;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCrawlerPlanRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
+import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
|
import com.tzld.longarticle.recommend.server.util.page.Page;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.beans.factory.annotation.Value;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
|
|
|
-import java.util.List;
|
|
|
+import java.util.*;
|
|
|
+import java.util.function.Function;
|
|
|
+import java.util.stream.Collectors;
|
|
|
|
|
|
@Service
|
|
|
@Slf4j
|
|
|
public class ArticleCategoryService {
|
|
|
|
|
|
@Autowired
|
|
|
- private LongArticleBaseMapper longArticleBaseMapper;
|
|
|
+ private ArticleCategoryMapper articleCategoryMapper;
|
|
|
+ @Autowired
|
|
|
+ ArticleCategoryRepository articleCategoryRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleCrawlerPlanRepository articleCrawlerPlanRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
|
|
|
+ @Autowired
|
|
|
+ CrawlerContentByPlanService crawlerContentByPlanService;
|
|
|
+ @Autowired
|
|
|
+ AigcBaseMapper aigcBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ KimiApiService kimiApiService;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${cold.pool.produce.planId:[\"20240802021606053813696\", \"20240802080355355308981\",\n" +
|
|
|
+ "\"20240805154433785506170\", \"20240805154359027876170\", \"20241024100016206421084\", " +
|
|
|
+ "\"20241030070010871546586\"]}")
|
|
|
+ private static List<String> producePlanIds;
|
|
|
+ @Value("${kimiCategoryPrompt:}")
|
|
|
+ private String kimiCategoryPrompt;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
|
|
|
+ private Map<String, Map<String, Map<String, String>>> produceConfig;
|
|
|
|
|
|
|
|
|
public Page<ArticleCategoryListVO> articleCategoryList(ArticleCategoryListParam param) {
|
|
|
int offset = (param.getPageNum() - 1) * param.getPageSize();
|
|
|
- int count = longArticleBaseMapper.articleCategoryCount(param.getTitle());
|
|
|
- List<ArticleCategoryListVO> list = longArticleBaseMapper.articleCategoryList(param.getTitle(), offset, param.getPageSize());
|
|
|
+ int count = articleCategoryMapper.articleCategoryCount(param.getTitle());
|
|
|
+ List<ArticleCategoryListVO> list = articleCategoryMapper.articleCategoryList(param.getTitle(), offset, param.getPageSize());
|
|
|
Page<ArticleCategoryListVO> page = new Page<>(param.getPageNum(), param.getPageSize());
|
|
|
page.setTotalSize(count);
|
|
|
page.setObjs(list);
|
|
@@ -30,7 +79,243 @@ public class ArticleCategoryService {
|
|
|
}
|
|
|
|
|
|
public void articleCategoryUpdate(ArticleCategoryUpdateParam param) {
|
|
|
- longArticleBaseMapper.updateArticleCategory(param.getTitle(), param.getCategory());
|
|
|
- longArticleBaseMapper.updateDatastatScoreCategory(param.getTitle(), param.getCategory());
|
|
|
+ articleCategoryMapper.updateArticleCategory(param.getTitle(), param.getCategory());
|
|
|
+ articleCategoryMapper.updateDatastatScoreCategory(param.getTitle(), param.getCategory());
|
|
|
+ }
|
|
|
+
|
|
|
+ public void articleCategory() {
|
|
|
+ // 根据抓取计划 添加品类处理任务
|
|
|
+ addArticleCategoryByCrawlerPlan();
|
|
|
+ // 冷启层 生成计划 添加品类处理任务
|
|
|
+ addColdArticleCategoryByProducePlan();
|
|
|
+ // 晋级 生成计划 添加品类处理任务
|
|
|
+ addPromotionArticleCategoryByProducePlan();
|
|
|
+ // 视频内容池 添加品类处理任务
|
|
|
+ addVideoPoolArticleCategory();
|
|
|
+ // 调用kimi进行内容分类
|
|
|
+ dealArticleCategory();
|
|
|
+ }
|
|
|
+
|
|
|
+ private void dealArticleCategory() {
|
|
|
+ List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
|
|
|
+ List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
|
|
|
+ for (List<ArticleCategory> partition : partitionList) {
|
|
|
+ List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
|
|
|
+ String prompt = buildKimiPrompt(partitionTitles);
|
|
|
+ KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ JSONObject obj = null;
|
|
|
+ if (kimiResult.isSuccess()) {
|
|
|
+ try {
|
|
|
+ obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (ArticleCategory articleCategory : partition) {
|
|
|
+ articleCategory.setKimiResult(kimiResult.getResponseStr());
|
|
|
+ articleCategory.setUpdateTimestamp(now);
|
|
|
+ if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
|
|
|
+ articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ } else {
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
|
|
|
+ articleCategory.setFailReason(kimiResult.getFailReason());
|
|
|
+ }
|
|
|
+ articleCategoryRepository.save(articleCategory);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addArticleCategoryByCrawlerPlan() {
|
|
|
+ List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
|
|
|
+ for (ArticleCrawlerPlan crawlerPlan : articleCrawlerPlanList) {
|
|
|
+ List<ProduceContentCrawlerVO> list = crawlerContentByPlanService.getCrawlerContentByPlan(crawlerPlan.getCrawlerPlanId(), producePlanIds);
|
|
|
+ List<String> produceContentIds = list.stream().map(ProduceContentCrawlerVO::getProduceContentId).collect(Collectors.toList());
|
|
|
+ List<ArticleCategory> exists = articleCategoryRepository.getByProduceContentIdIn(produceContentIds);
|
|
|
+ List<String> existsIds = exists.stream().map(ArticleCategory::getProduceContentId).collect(Collectors.toList());
|
|
|
+ list = list.stream().filter(o -> !existsIds.contains(o.getProduceContentId())).collect(Collectors.toList());
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ List<ArticleCategory> saveList = new ArrayList<>();
|
|
|
+ for (ProduceContentCrawlerVO vo : list) {
|
|
|
+ ArticleCategory item = new ArticleCategory();
|
|
|
+ item.setCrawlerPlanId(crawlerPlan.getCrawlerPlanId());
|
|
|
+ item.setChannelContentId(vo.getChannelContentId());
|
|
|
+ item.setProduceContentId(vo.getProduceContentId());
|
|
|
+ item.setTitle(vo.getTitle());
|
|
|
+ item.setTitleMd5(Md5Util.encoderByMd5(vo.getTitle()));
|
|
|
+ item.setCreateTimestamp(now);
|
|
|
+ saveList.add(item);
|
|
|
+ }
|
|
|
+ if (CollectionUtils.isNotEmpty(saveList)) {
|
|
|
+ articleCategoryMapper.batchInsertArticleCategory(saveList);
|
|
|
+ }
|
|
|
+ // 抓取计划超过5天设置为已处理
|
|
|
+ String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 8);
|
|
|
+ if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400 * 5) {
|
|
|
+ crawlerPlan.setStatus(StatusEnum.ONE.getCode());
|
|
|
+ crawlerPlan.setUpdateTimestamp(now);
|
|
|
+ articleCrawlerPlanRepository.save(crawlerPlan);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addColdArticleCategoryByProducePlan() {
|
|
|
+ List<ArticleCategory> saveList = addArticleCategoryByProducePlan(producePlanIds);
|
|
|
+ if (CollectionUtils.isNotEmpty(saveList)) {
|
|
|
+ articleCategoryMapper.batchInsertArticleCategory(saveList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addPromotionArticleCategoryByProducePlan() {
|
|
|
+ List<String> articlePromotionProducePlanIds = new ArrayList<>();
|
|
|
+ // 获取晋级生成计划Id
|
|
|
+ for (Map.Entry<String, Map<String, Map<String, String>>> oneEntry : produceConfig.entrySet()) {
|
|
|
+ for (Map.Entry<String, Map<String, String>> twoEntry : oneEntry.getValue().entrySet()) {
|
|
|
+ twoEntry.getValue().forEach((key, value) -> {
|
|
|
+ if (StringUtils.hasText(value) && !producePlanIds.contains(value)) {
|
|
|
+ articlePromotionProducePlanIds.add(value);
|
|
|
+ }
|
|
|
+ });
|
|
|
+ }
|
|
|
+ }
|
|
|
+ List<ArticleCategory> saveList = addArticleCategoryByProducePlan(articlePromotionProducePlanIds);
|
|
|
+ // 已晋级文章 先溯源查找源内容品类,查询不到再用kimi进行分类
|
|
|
+ if (CollectionUtils.isNotEmpty(saveList)) {
|
|
|
+ List<String> channelContentIds = saveList.stream().map(ArticleCategory::getChannelContentId)
|
|
|
+ .collect(Collectors.toList());
|
|
|
+ // 查询晋升rootProduceContentId
|
|
|
+ List<ArticlePoolPromotionSource> sourceList = articlePoolPromotionSourceRepository
|
|
|
+ .getByChannelContentIdInAndStatusAndDeleted(channelContentIds,
|
|
|
+ ArticlePoolPromotionSourceStatusEnum.FINISH.getCode(), 0);
|
|
|
+ Map<String, ArticlePoolPromotionSource> sourceMap = sourceList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticlePoolPromotionSource::getChannelContentId, Function.identity()));
|
|
|
+ // 根据produceContentId查询category
|
|
|
+ List<ArticleCategory> articleCategoryList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ Map<String, ArticleCategory> categoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getProduceContentId, Function.identity()));
|
|
|
+ Map<String, ArticleCategory> coldStartCategoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getChannelContentId, Function.identity(), (a, b) -> a));
|
|
|
+ Map<String, ArticleCategory> titleCategoryMap = articleCategoryList.stream()
|
|
|
+ .collect(Collectors.toMap(ArticleCategory::getTitleMd5, Function.identity(), (a, b) -> a));
|
|
|
+ for (ArticleCategory articleCategory : saveList) {
|
|
|
+ ArticlePoolPromotionSource source = sourceMap.get(articleCategory.getChannelContentId());
|
|
|
+ ArticleCategory category = null;
|
|
|
+ if (Objects.nonNull(source) && Objects.nonNull(source.getRootProduceContentId())) {
|
|
|
+ category = categoryMap.get(source.getRootProduceContentId());
|
|
|
+ }
|
|
|
+ if (Objects.isNull(category)) {
|
|
|
+ category = coldStartCategoryMap.get(articleCategory.getChannelContentId());
|
|
|
+ }
|
|
|
+ if (Objects.isNull(category)) {
|
|
|
+ category = titleCategoryMap.get(articleCategory.getTitleMd5());
|
|
|
+ }
|
|
|
+ if (Objects.nonNull(category) && StringUtils.hasText(category.getCategory())) {
|
|
|
+ articleCategory.setCategory(category.getCategory());
|
|
|
+ articleCategory.setKimiResult(category.getKimiResult());
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ articleCategoryMapper.batchInsertArticleCategory(saveList);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addVideoPoolArticleCategory() {
|
|
|
+ List<ArticleCategory> saveList = new ArrayList<>();
|
|
|
+ // 查找所有待处理视频内容池内容
|
|
|
+ List<PublishSingleVideoSource> dealList = articleCategoryMapper.getVideoPoolArticleCategoryDealList();
|
|
|
+ if (CollectionUtils.isEmpty(dealList)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ for (PublishSingleVideoSource videoSource : dealList) {
|
|
|
+ ArticleCategory item = new ArticleCategory();
|
|
|
+ item.setProduceContentId(videoSource.getContentTraceId());
|
|
|
+ item.setTitle(videoSource.getArticleTitle());
|
|
|
+ item.setTitleMd5(Md5Util.encoderByMd5(videoSource.getArticleTitle()));
|
|
|
+ item.setCreateTimestamp(now);
|
|
|
+ saveList.add(item);
|
|
|
+ }
|
|
|
+ articleCategoryMapper.batchInsertArticleCategory(saveList);
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 根据生成计划获取需要进行分类内容
|
|
|
+ */
|
|
|
+ private List<ArticleCategory> addArticleCategoryByProducePlan(List<String> producePlanIds) {
|
|
|
+ List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
|
|
|
+ List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
|
|
|
+ List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
|
|
|
+ List<String> articleCategoryIds = articleCategoryList.stream().map(ArticleCategory::getChannelContentId).collect(Collectors.toList());
|
|
|
+ List<ProduceContentCrawlerVO> list = produceContentList.stream().filter(o -> !articleCategoryIds.contains(o.getChannelContentId())).map(o -> {
|
|
|
+ ProduceContentCrawlerVO item = new ProduceContentCrawlerVO();
|
|
|
+ item.setChannelContentId(o.getChannelContentId());
|
|
|
+ item.setProduceContentId(o.getPlanExeId());
|
|
|
+ return item;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ channelContentIds = channelContentIds.stream().filter(o -> !articleCategoryIds.contains(o)).collect(Collectors.toList());
|
|
|
+ if (CollectionUtils.isEmpty(channelContentIds)) {
|
|
|
+ return Collections.emptyList();
|
|
|
+ }
|
|
|
+ List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
|
|
|
+ Map<String, CrawlerContent> map = crawlerContentList.stream().collect(Collectors.toMap(CrawlerContent::getChannelContentId, Function.identity()));
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ List<ArticleCategory> saveList = new ArrayList<>();
|
|
|
+ for (ProduceContentCrawlerVO vo : list) {
|
|
|
+ ArticleCategory item = new ArticleCategory();
|
|
|
+ item.setChannelContentId(vo.getChannelContentId());
|
|
|
+ item.setProduceContentId(vo.getProduceContentId());
|
|
|
+ CrawlerContent crawlerContent = map.get(vo.getChannelContentId());
|
|
|
+ if (Objects.nonNull(crawlerContent)) {
|
|
|
+ String title = crawlerContent.getTitle();
|
|
|
+ item.setCrawlerPlanId(crawlerContent.getCrawlerPlanId());
|
|
|
+ item.setTitle(title);
|
|
|
+ item.setTitleMd5(Md5Util.encoderByMd5(title));
|
|
|
+ item.setCreateTimestamp(now);
|
|
|
+ saveList.add(item);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return saveList;
|
|
|
+ }
|
|
|
+
|
|
|
+ private String buildKimiPrompt(List<String> titleList) {
|
|
|
+ StringBuilder prompt = new StringBuilder(kimiCategoryPrompt);
|
|
|
+ prompt.append("\n");
|
|
|
+ for (String title : titleList) {
|
|
|
+ prompt.append(title).append("\n");
|
|
|
+ }
|
|
|
+ return prompt.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ public void articleCategoryJobRetry() {
|
|
|
+ List<ArticleCategory> dealList = articleCategoryRepository.getByStatusAndRetryTimesLessThan(ArticleCategoryStatusEnum.FAIL.getCode(), 3);
|
|
|
+ for (ArticleCategory articleCategory : dealList) {
|
|
|
+ List<String> partitionTitles = Collections.singletonList(articleCategory.getTitle());
|
|
|
+ String prompt = buildKimiPrompt(partitionTitles);
|
|
|
+ KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ JSONObject obj = null;
|
|
|
+ if (kimiResult.isSuccess()) {
|
|
|
+ try {
|
|
|
+ obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ articleCategory.setKimiResult(kimiResult.getResponseStr());
|
|
|
+ articleCategory.setUpdateTimestamp(now);
|
|
|
+ articleCategory.setRetryTimes(articleCategory.getRetryTimes() + 1);
|
|
|
+ if (kimiResult.isSuccess() && Objects.nonNull(obj)) {
|
|
|
+ List<String> keys = new ArrayList<>(obj.keySet());
|
|
|
+ String category = obj.getString(keys.get(0));
|
|
|
+ articleCategory.setCategory(category);
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ articleCategory.setFailReason(null);
|
|
|
+ } else {
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
|
|
|
+ articleCategory.setFailReason(kimiResult.getFailReason());
|
|
|
+ }
|
|
|
+ articleCategoryRepository.save(articleCategory);
|
|
|
+ }
|
|
|
}
|
|
|
}
|