|
@@ -2,6 +2,7 @@ package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
|
|
|
import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.tzld.longarticle.recommend.server.common.enums.aigc.CrawlerModeEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
|
|
|
import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
|
|
|
import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
@@ -23,6 +24,7 @@ import com.tzld.longarticle.recommend.server.repository.longArticle.DatastatSort
|
|
|
import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
|
import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
+import com.tzld.longarticle.recommend.server.util.feishu.FeishuMessageSender;
|
|
|
import lombok.extern.slf4j.Slf4j;
|
|
|
import org.apache.commons.collections4.CollectionUtils;
|
|
|
import org.springframework.beans.factory.annotation.Autowired;
|
|
@@ -116,59 +118,66 @@ public class ArticlePromotionService {
|
|
|
aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag, CrawlerModeEnum.ContentIDs.getVal());
|
|
|
return;
|
|
|
}
|
|
|
- String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
|
|
|
- List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
|
|
|
- // 获取已访问的标题和URL
|
|
|
- List<String> visitedTitleList = contentList.stream()
|
|
|
- .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
|
|
|
- .distinct().collect(Collectors.toList());
|
|
|
- Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
|
|
|
- .collect(Collectors.toSet());
|
|
|
- // 筛选URL和标题
|
|
|
- List<String> publishContentIds = new ArrayList<>();
|
|
|
- List<String> filterUrlList = new ArrayList<>();
|
|
|
- for (DatastatSortStrategy item : list) {
|
|
|
- String url = item.getLink();
|
|
|
- String urlId = getUrlId(item.getLink());
|
|
|
- String title = item.getTitle();
|
|
|
- String wxSn = item.getWxSn();
|
|
|
- if (visitedUrlIdList.contains(urlId)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
|
|
|
- continue;
|
|
|
- }
|
|
|
- filterUrlList.add(url);
|
|
|
- // 调用爬虫 detail 接口并保存数据
|
|
|
- WxContentDetailResponse detail = getArticleDetail(url);
|
|
|
- String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
|
|
|
- if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
|
|
|
- saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
|
|
|
- } else {
|
|
|
- String publishContentId = articleService.getPublishContentByWxSn(wxSn);
|
|
|
- if (StringUtils.hasText(publishContentId)) {
|
|
|
- publishContentIds.add(publishContentId);
|
|
|
- saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
|
|
|
+ try {
|
|
|
+ String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
|
|
|
+ List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
|
|
|
+ // 获取已访问的标题和URL
|
|
|
+ List<String> visitedTitleList = contentList.stream()
|
|
|
+ .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
|
|
|
+ .distinct().collect(Collectors.toList());
|
|
|
+ Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
|
|
|
+ .collect(Collectors.toSet());
|
|
|
+ // 筛选URL和标题
|
|
|
+ List<String> publishContentIds = new ArrayList<>();
|
|
|
+ List<String> filterUrlList = new ArrayList<>();
|
|
|
+ for (DatastatSortStrategy item : list) {
|
|
|
+ String url = item.getLink();
|
|
|
+ String urlId = getUrlId(item.getLink());
|
|
|
+ String title = item.getTitle();
|
|
|
+ String wxSn = item.getWxSn();
|
|
|
+ if (visitedUrlIdList.contains(urlId)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ filterUrlList.add(url);
|
|
|
+ // 调用爬虫 detail 接口并保存数据
|
|
|
+ WxContentDetailResponse detail = getArticleDetail(url);
|
|
|
+ String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
|
|
|
+ if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
|
|
|
+ saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
|
|
|
+ } else {
|
|
|
+ String publishContentId = articleService.getPublishContentByWxSn(wxSn);
|
|
|
+ if (StringUtils.hasText(publishContentId)) {
|
|
|
+ publishContentIds.add(publishContentId);
|
|
|
+ saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
|
|
|
+ }
|
|
|
}
|
|
|
}
|
|
|
- }
|
|
|
- if (filterUrlList.isEmpty()) {
|
|
|
- log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
|
|
|
- return;
|
|
|
- }
|
|
|
- int urlLen = filterUrlList.size();
|
|
|
- String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
|
|
|
- log.info("url_len: " + list.size() + ", " + urlLen);
|
|
|
- IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
|
|
|
- if (StringUtils.hasText(produceId)) {
|
|
|
- articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
- }
|
|
|
- log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
|
|
|
- if (CollectionUtils.isNotEmpty(publishContentIds)) {
|
|
|
- planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
|
|
|
+ if (filterUrlList.isEmpty()) {
|
|
|
+ log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ int urlLen = filterUrlList.size();
|
|
|
+ String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
|
|
|
+ log.info("url_len: " + list.size() + ", " + urlLen);
|
|
|
+ IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
|
|
|
if (StringUtils.hasText(produceId)) {
|
|
|
articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
}
|
|
|
+ log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
|
|
|
+ if (CollectionUtils.isNotEmpty(publishContentIds)) {
|
|
|
+ planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
|
|
|
+ if (StringUtils.hasText(produceId)) {
|
|
|
+ articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("articlePromotion error: ", e);
|
|
|
+ FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
|
|
|
+ "文章晋升ERROR:\n" +
|
|
|
+ "articlePromotion error: " + e.getMessage());
|
|
|
}
|
|
|
}
|
|
|
|
|
@@ -211,7 +220,7 @@ public class ArticlePromotionService {
|
|
|
return result;
|
|
|
}
|
|
|
int pageSize = 500;
|
|
|
- List<Integer> produceStatus = Arrays.asList(1,2,3,4,5,6);
|
|
|
+ List<Integer> produceStatus = Arrays.asList(1, 2, 3, 4, 5, 6);
|
|
|
CommonListDataVO<ProduceContentListItemVO> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
|
|
|
int totalCnt = rawData.getTotalCount();
|
|
|
int pageNumMax = totalCnt / pageSize;
|
|
@@ -302,24 +311,32 @@ public class ArticlePromotionService {
|
|
|
}
|
|
|
|
|
|
private void articleAddDependPlan(String produceId, String planId, String planName) {
|
|
|
- // 获取生产计划的详细信息
|
|
|
- ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
|
|
|
- if (detail == null) {
|
|
|
- log.info("Failed to fetch produce plan detail.");
|
|
|
- return;
|
|
|
- }
|
|
|
- // 获取依赖计划 ID 列表
|
|
|
- List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
|
|
|
- List<String> dependPlanIds = new ArrayList<>();
|
|
|
- for (ProducePlanInputSourceParam inputSource : inputSources) {
|
|
|
- dependPlanIds.add(inputSource.getInputSourceValue());
|
|
|
- }
|
|
|
- // 如果计划 ID 已存在,直接返回
|
|
|
- if (dependPlanIds.contains(planId)) {
|
|
|
- log.info("depend_plan_id exist: {}", planId);
|
|
|
- return;
|
|
|
+ try {
|
|
|
+ // 获取生产计划的详细信息
|
|
|
+ ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
|
|
|
+ if (detail == null) {
|
|
|
+ FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
|
|
|
+ "文章晋升ERROR:\n" +
|
|
|
+ "articleAddDependPlan getProducePlan error: planId:" + produceId);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ // 获取依赖计划 ID 列表
|
|
|
+ List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
|
|
|
+ List<String> dependPlanIds = new ArrayList<>();
|
|
|
+ for (ProducePlanInputSourceParam inputSource : inputSources) {
|
|
|
+ dependPlanIds.add(inputSource.getInputSourceValue());
|
|
|
+ }
|
|
|
+ // 如果计划 ID 已存在,直接返回
|
|
|
+ if (dependPlanIds.contains(planId)) {
|
|
|
+ log.info("depend_plan_id exist: {}", planId);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ aigcProducePlanSaveService.save(planName, planId, detail);
|
|
|
+ } catch (Exception e) {
|
|
|
+ FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
|
|
|
+ "文章晋升ERROR:\n" +
|
|
|
+ "articleAddDependPlan error: " + e.getMessage());
|
|
|
}
|
|
|
- aigcProducePlanSaveService.save(planName, planId, detail);
|
|
|
}
|
|
|
|
|
|
}
|