|
@@ -1,28 +1,41 @@
|
|
|
package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
+import com.google.common.collect.Lists;
|
|
|
import com.google.common.util.concurrent.ThreadFactoryBuilder;
|
|
|
import com.tzld.longarticle.recommend.server.common.CommonThreadPoolExecutor;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.StatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.aigc.PublishContentTypeEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticleCategoryStatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.recommend.ArticlePoolPromotionSourceStatusEnum;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
|
|
|
+import com.tzld.longarticle.recommend.server.model.dto.kimi.KimiResult;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContent;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishContentOutput;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCrawlerPlan;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
import com.tzld.longarticle.recommend.server.model.param.ArticleFindSourceParam;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.ProduceContentCrawlerVO;
|
|
|
import com.tzld.longarticle.recommend.server.model.vo.RootPublishContentVO;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.CrawlerContentByPlanService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.KimiApiService;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.aigc.PublishContentOutputRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCrawlerPlanRepository;
|
|
|
import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
import com.tzld.longarticle.recommend.server.service.recommend.config.AccountIndexAvgViewCountService;
|
|
|
import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
+import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
|
import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.collections4.CollectionUtils;
|
|
@@ -31,10 +44,7 @@ import org.springframework.beans.factory.annotation.Autowired;
|
|
|
import org.springframework.stereotype.Service;
|
|
|
import org.springframework.util.StringUtils;
|
|
|
|
|
|
-import java.util.ArrayList;
|
|
|
-import java.util.List;
|
|
|
-import java.util.Map;
|
|
|
-import java.util.Objects;
|
|
|
+import java.util.*;
|
|
|
import java.util.concurrent.*;
|
|
|
import java.util.stream.Collectors;
|
|
|
|
|
@@ -63,6 +73,19 @@ public class ArticleService {
|
|
|
LongArticleBaseMapper longArticleBaseMapper;
|
|
|
@Autowired
|
|
|
ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleCrawlerPlanRepository articleCrawlerPlanRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleCategoryRepository articleCategoryRepository;
|
|
|
+ @Autowired
|
|
|
+ CrawlerContentByPlanService crawlerContentByPlanService;
|
|
|
+ @Autowired
|
|
|
+ KimiApiService kimiApiService;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${cold.pool.produce.planId:[\"20240802021606053813696\", \"20240802080355355308981\",\n" +
|
|
|
+ "\"20240805154433785506170\", \"20240805154359027876170\", \"20241024100016206421084\", " +
|
|
|
+ "\"20241030070010871546586\"]}")
|
|
|
+ private static List<String> producePlanIds;
|
|
|
|
|
|
private final static ExecutorService pool = new CommonThreadPoolExecutor(
|
|
|
32,
|
|
@@ -262,7 +285,7 @@ public class ArticleService {
|
|
|
// 溯源
|
|
|
Article article = articleRepository.getByWxSn(task.getWxSn());
|
|
|
if (Objects.isNull(article)) {
|
|
|
- task.setDeleted(StatusEnum.SUCCESS.getCode());
|
|
|
+ task.setDeleted(StatusEnum.ONE.getCode());
|
|
|
articlePoolPromotionSourceRepository.save(task);
|
|
|
continue;
|
|
|
}
|
|
@@ -274,7 +297,7 @@ public class ArticleService {
|
|
|
List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), publishTimestamp, 100);
|
|
|
PublishContent publishContent = findPublishContent(publishContentList, task.getTitle(), publishTimestamp);
|
|
|
if (Objects.isNull(publishContent)) {
|
|
|
- task.setDeleted(StatusEnum.SUCCESS.getCode());
|
|
|
+ task.setDeleted(StatusEnum.ONE.getCode());
|
|
|
articlePoolPromotionSourceRepository.save(task);
|
|
|
continue;
|
|
|
}
|
|
@@ -309,9 +332,145 @@ public class ArticleService {
|
|
|
}
|
|
|
longArticleBaseMapper.updateRootProduceContentLevel(task.getRootProduceContentId(), task.getLevel());
|
|
|
} else {
|
|
|
- task.setDeleted(StatusEnum.SUCCESS.getCode());
|
|
|
+ task.setDeleted(StatusEnum.ONE.getCode());
|
|
|
articlePoolPromotionSourceRepository.save(task);
|
|
|
}
|
|
|
}
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
+ public void articleCategory() {
|
|
|
+ List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
|
|
|
+ for (ArticleCrawlerPlan crawlerPlan : articleCrawlerPlanList) {
|
|
|
+ List<ProduceContentCrawlerVO> list = crawlerContentByPlanService.getCrawlerContentByPlan(crawlerPlan.getCrawlerPlanId(), producePlanIds);
|
|
|
+ List<String> produceContentIds = list.stream().map(ProduceContentCrawlerVO::getProduceContentId).collect(Collectors.toList());
|
|
|
+ List<ArticleCategory> exists = articleCategoryRepository.getByProduceContentIdIn(produceContentIds);
|
|
|
+ List<String> existsIds = exists.stream().map(ArticleCategory::getProduceContentId).collect(Collectors.toList());
|
|
|
+ list = list.stream().filter(o -> !existsIds.contains(o.getProduceContentId())).collect(Collectors.toList());
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ List<ArticleCategory> saveList = new ArrayList<>();
|
|
|
+ for (ProduceContentCrawlerVO vo : list) {
|
|
|
+ ArticleCategory item = new ArticleCategory();
|
|
|
+ item.setCrawlerPlanId(crawlerPlan.getCrawlerPlanId());
|
|
|
+ item.setChannelContentId(vo.getChannelContentId());
|
|
|
+ item.setProduceContentId(vo.getProduceContentId());
|
|
|
+ item.setTitle(vo.getTitle());
|
|
|
+ item.setTitleMd5(Md5Util.encoderByMd5(vo.getTitle()));
|
|
|
+ item.setCreateTimestamp(now);
|
|
|
+ saveList.add(item);
|
|
|
+ }
|
|
|
+ if (CollectionUtils.isNotEmpty(saveList)) {
|
|
|
+ longArticleBaseMapper.batchInsertArticleCategory(saveList);
|
|
|
+ }
|
|
|
+ // 抓取计划超过5天设置为已处理
|
|
|
+ String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 8);
|
|
|
+ if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400 * 5) {
|
|
|
+ crawlerPlan.setStatus(StatusEnum.ONE.getCode());
|
|
|
+ crawlerPlan.setUpdateTimestamp(now);
|
|
|
+ articleCrawlerPlanRepository.save(crawlerPlan);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
|
|
|
+ List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
|
|
|
+ for (List<ArticleCategory> partition : partitionList) {
|
|
|
+ List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
|
|
|
+ String prompt = buildKimiPrompt(partitionTitles);
|
|
|
+ KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ JSONObject obj = null;
|
|
|
+ if (kimiResult.isSuccess()) {
|
|
|
+ try {
|
|
|
+ obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (ArticleCategory articleCategory : partition) {
|
|
|
+ articleCategory.setKimiResult(kimiResult.getResponseStr());
|
|
|
+ articleCategory.setUpdateTimestamp(now);
|
|
|
+ if (kimiResult.isSuccess() && Objects.nonNull(obj) && obj.containsKey(articleCategory.getTitle())) {
|
|
|
+ articleCategory.setCategory(obj.getString(articleCategory.getTitle()));
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ } else {
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
|
|
|
+ articleCategory.setFailReason(kimiResult.getFailReason());
|
|
|
+ }
|
|
|
+ articleCategoryRepository.save(articleCategory);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+ private String buildKimiPrompt(List<String> titleList) {
|
|
|
+ StringBuilder prompt = new StringBuilder(
|
|
|
+ "请帮我完成以下任务:输入为文章的标题,根据标题判断其内容所属的类目,输出为文章标题及其对应的类目。\n" +
|
|
|
+ "类目需从以下范围内选择:\n" +
|
|
|
+ "奇闻趣事\n" +
|
|
|
+ "历史人物\n" +
|
|
|
+ "家长里短\n" +
|
|
|
+ "温情故事\n" +
|
|
|
+ "健康养生\n" +
|
|
|
+ "生活知识\n" +
|
|
|
+ "名人八卦\n" +
|
|
|
+ "政治新闻\n" +
|
|
|
+ "军事新闻\n" +
|
|
|
+ "为了更好地完成任务,可参考下列对文章标题的分类:\n" +
|
|
|
+ "{" +
|
|
|
+ "\"大舅病了,我取了三万元送过去,病房门口听到舅妈的话我改了主意\": \"家长里短\",\n" +
|
|
|
+ "\"能活到90岁的老人,基本上在70岁的时候,就不再做这些事了!\": \"健康养生\",\n" +
|
|
|
+ "\"去医院看望病人时,切忌带这4样东西,再亲近也不行,这是做人的根本\": \"生活知识\",\n" +
|
|
|
+ "\"上海一女子去饭店吃生煎包,戳了个洞想凉一凉,往里一看,瞬间惊呆了\": \"奇闻趣事\",\n" +
|
|
|
+ "\"卫生间放一把食盐,一年能省下好几百,涨新知识\": \"生活知识\",\n" +
|
|
|
+ "\"中国有一古寺,庙不大,却有武警24小时站岗,到底有何“过人”之处\": \"奇闻趣事\",\n" +
|
|
|
+ "\"1974年,苏联外长故意拿邓小平身高“取笑”,邓小平一句话轻松反击\": \"历史人物\",\n" +
|
|
|
+ "\"中国最美的女将军:上世纪曾家喻户晓,如今仍然健在\": \"历史人物\",\n" +
|
|
|
+ "\"北大才女蒙曼48岁仍未婚,被问最想嫁给谁,一个名字让全场笑喷\": \"名人八卦\",\n" +
|
|
|
+ "\"广东一老人去世,家人把老人的旧床垫扔了,环卫工人看到后,竟发现里面藏了15万元现金!家人傻眼了\": \"奇闻趣事\",\n" +
|
|
|
+ "}" +
|
|
|
+ "最后输出结果请用JSON格式输出,key为title,value为类目,仅输出JSON,不要markdown格式,不要任何其他内容," +
|
|
|
+ "并且内容可以被 fastJSON 的JSONObject.parseObject转换为JSON对象\n" +
|
|
|
+ "当标题的开头或结尾为以下字符时“”“,则在标题的开头或结尾增加\" " +
|
|
|
+ "输出结果格式如下:\n" +
|
|
|
+ "{" +
|
|
|
+ "\"浙江老人用“假钱”吃霸王餐9年,离世后,老板却崩溃大哭:“每天都在等他!”\": \"奇闻趣事\"," +
|
|
|
+ "\"“最美婴儿”迅速走红,像在娘胎里整过容,网友:看到第一眼就想抱回家\": \"奇闻趣事\"" +
|
|
|
+ "}" +
|
|
|
+ "以下是需要分析的文章标题列表,每一行是一个标题:\n");
|
|
|
+ for (String title : titleList) {
|
|
|
+ prompt.append(title).append("\n");
|
|
|
+ }
|
|
|
+ return prompt.toString();
|
|
|
+ }
|
|
|
+
|
|
|
+ public void articleCategoryJobRetry() {
|
|
|
+ List<ArticleCategory> dealList = articleCategoryRepository.getByStatusAndRetryTimesLessThan(ArticleCategoryStatusEnum.FAIL.getCode(), 3);
|
|
|
+ for (ArticleCategory articleCategory : dealList) {
|
|
|
+ List<String> partitionTitles = Collections.singletonList(articleCategory.getTitle());
|
|
|
+ String prompt = buildKimiPrompt(partitionTitles);
|
|
|
+ KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
|
|
|
+ long now = System.currentTimeMillis();
|
|
|
+ JSONObject obj = null;
|
|
|
+ if (kimiResult.isSuccess()) {
|
|
|
+ try {
|
|
|
+ obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ articleCategory.setKimiResult(kimiResult.getResponseStr());
|
|
|
+ articleCategory.setUpdateTimestamp(now);
|
|
|
+ articleCategory.setRetryTimes(articleCategory.getRetryTimes() + 1);
|
|
|
+ if (kimiResult.isSuccess() && Objects.nonNull(obj)) {
|
|
|
+ List<String> keys = new ArrayList<>(obj.keySet());
|
|
|
+ String category = obj.getString(keys.get(0));
|
|
|
+ articleCategory.setCategory(category);
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.SUCCESS.getCode());
|
|
|
+ articleCategory.setFailReason(null);
|
|
|
+ } else {
|
|
|
+ articleCategory.setStatus(ArticleCategoryStatusEnum.FAIL.getCode());
|
|
|
+ articleCategory.setFailReason(kimiResult.getFailReason());
|
|
|
+ }
|
|
|
+ articleCategoryRepository.save(articleCategory);
|
|
|
+ }
|
|
|
+ }
|
|
|
}
|