|
@@ -0,0 +1,325 @@
|
|
|
+package com.tzld.longarticle.recommend.server.service.recommend;
|
|
|
+
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
+import com.tzld.longarticle.recommend.server.common.enums.aigc.CrawlerModeEnum;
|
|
|
+import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
|
|
|
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.IdNameVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.WxContentDetailResponse;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.CommonListDataVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProduceContentListItemVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanDetailVO;
|
|
|
+import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanInputSourceParam;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.WxFetchRemoteService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCCrawlerPlanSaveService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProduceContentListService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProducePlanDetailService;
|
|
|
+import com.tzld.longarticle.recommend.server.remote.aigc.AIGCProducePlanSaveService;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.aigc.PublishAccountRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.repository.longArticle.DatastatSortStrategyRepository;
|
|
|
+import com.tzld.longarticle.recommend.server.util.DateUtils;
|
|
|
+import com.tzld.longarticle.recommend.server.util.Md5Util;
|
|
|
+import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
|
|
|
+import lombok.extern.slf4j.Slf4j;
|
|
|
+import org.apache.commons.collections4.CollectionUtils;
|
|
|
+import org.springframework.beans.factory.annotation.Autowired;
|
|
|
+import org.springframework.stereotype.Service;
|
|
|
+import org.springframework.util.StringUtils;
|
|
|
+
|
|
|
+import java.net.URLDecoder;
|
|
|
+import java.util.*;
|
|
|
+import java.util.stream.Collectors;
|
|
|
+
|
|
|
+@Service
|
|
|
+@Slf4j
|
|
|
+public class ArticlePromotionService {
|
|
|
+
|
|
|
+ @Autowired
|
|
|
+ LongArticleBaseMapper longArticleBaseMapper;
|
|
|
+ @Autowired
|
|
|
+ DatastatSortStrategyRepository datastatSortStrategyRepository;
|
|
|
+ @Autowired
|
|
|
+ AIGCCrawlerPlanSaveService aigcCrawlerPlanSaveService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProducePlanDetailService aigcProducePlanDetailService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProducePlanSaveService aigcProducePlanSaveService;
|
|
|
+ @Autowired
|
|
|
+ AIGCProduceContentListService aigcProduceContentListService;
|
|
|
+ @Autowired
|
|
|
+ ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
|
|
|
+ @Autowired
|
|
|
+ WxFetchRemoteService wxFetchRemoteService;
|
|
|
+ @Autowired
|
|
|
+ PublishAccountRepository publishAccountRepository;
|
|
|
+ @Autowired
|
|
|
+ ArticleService articleService;
|
|
|
+
|
|
|
+ @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
|
|
|
+ private Map<String, Map<String, Map<String, String>>> produceConfig;
|
|
|
+
|
|
|
+ private final List<String> contentPoolType = Arrays.asList("autoArticlePoolLevel1", "autoArticlePoolLevel3", "autoArticlePoolLevel4");
|
|
|
+
|
|
|
+ public void articlePromotion(String pos, String way, String accountNickName, String tag,
|
|
|
+ Integer viewCountFilter, Double viewCountRateFilter, List<Integer> positionFilter) {
|
|
|
+ String today = DateUtils.getCurrentDateStr("yyyyMMdd");
|
|
|
+ String dateStrFilter = DateUtils.getBeforeDaysDateStr("yyyyMMdd", 10);
|
|
|
+ // 获取内部表现
|
|
|
+ List<DatastatSortStrategy> list = longArticleBaseMapper.getArticlePromotion(viewCountFilter, viewCountRateFilter,
|
|
|
+ 10000, dateStrFilter, positionFilter);
|
|
|
+ list = filterEarlyContent(list);
|
|
|
+ log.info("优质{}文章数量: {}", accountNickName, list.size());
|
|
|
+ List<DatastatSortStrategy> distinct = filterSameTitle(list);
|
|
|
+ distinct.sort(Comparator.comparing(DatastatSortStrategy::getDateStr, Comparator.reverseOrder()));
|
|
|
+ log.info("优质{}文章数量(去重后): {}", accountNickName, distinct.size());
|
|
|
+ addUrlListToAccount(accountNickName, distinct, pos, way, today, tag);
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<DatastatSortStrategy> filterEarlyContent(List<DatastatSortStrategy> list) {
|
|
|
+ List<String> ghIds = list.stream().map(DatastatSortStrategy::getGhId).distinct().collect(Collectors.toList());
|
|
|
+ List<PublishAccount> publishAccountList = publishAccountRepository.getAllByGhIdIn(ghIds);
|
|
|
+ Map<String, Long> publishAccountCreateTimeMap = publishAccountList.stream().collect(Collectors.toMap(
|
|
|
+ PublishAccount::getGhId, PublishAccount::getCreateTimestamp));
|
|
|
+ list = list.stream().filter(o -> {
|
|
|
+ long publishTime = DateUtils.dateStrToTimestamp(o.getDateStr(), "yyyyMMdd");
|
|
|
+ Long accountCreateTime = publishAccountCreateTimeMap.get(o.getGhId());
|
|
|
+ return publishTime * 1000 > accountCreateTime;
|
|
|
+ }).collect(Collectors.toList());
|
|
|
+ return list;
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<DatastatSortStrategy> filterSameTitle(List<DatastatSortStrategy> list) {
|
|
|
+ List<DatastatSortStrategy> result = new ArrayList<>();
|
|
|
+ List<String> titles = new ArrayList<>();
|
|
|
+ for (DatastatSortStrategy datastatSortStrategy : list) {
|
|
|
+ String title = datastatSortStrategy.getTitle();
|
|
|
+ if (titles.contains(title)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (!TitleSimilarCheckUtil.isDuplicateContent(datastatSortStrategy.getTitle(), titles, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
|
|
|
+ result.add(datastatSortStrategy);
|
|
|
+ titles.add(title);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void addUrlListToAccount(String accountNickName, List<DatastatSortStrategy> list, String pos, String way,
|
|
|
+ String today, String tag) {
|
|
|
+ List<String> urlList = list.stream().map(DatastatSortStrategy::getLink).collect(Collectors.toList());
|
|
|
+ if (!produceConfig.containsKey(accountNickName)) {
|
|
|
+ log.info("account_nickname not in produceConfig: " + accountNickName);
|
|
|
+ String planName = String.format("%d_%s_%s_%s【%s】_%s", list.size(), today, accountNickName, pos, way, today);
|
|
|
+ aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag, CrawlerModeEnum.ContentIDs.getVal());
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
|
|
|
+ List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
|
|
|
+ // 获取已访问的标题和URL
|
|
|
+ List<String> visitedTitleList = contentList.stream()
|
|
|
+ .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
|
|
|
+ .distinct().collect(Collectors.toList());
|
|
|
+ Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
|
|
|
+ .collect(Collectors.toSet());
|
|
|
+ // 筛选URL和标题
|
|
|
+ List<String> publishContentIds = new ArrayList<>();
|
|
|
+ List<String> filterUrlList = new ArrayList<>();
|
|
|
+ for (DatastatSortStrategy item : list) {
|
|
|
+ String url = item.getLink();
|
|
|
+ String urlId = getUrlId(item.getLink());
|
|
|
+ String title = item.getTitle();
|
|
|
+ String wxSn = item.getWxSn();
|
|
|
+ if (visitedUrlIdList.contains(urlId)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ filterUrlList.add(url);
|
|
|
+ // 调用爬虫 detail 接口并保存数据
|
|
|
+ WxContentDetailResponse detail = getArticleDetail(url);
|
|
|
+ String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
|
|
|
+ if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
|
|
|
+ saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
|
|
|
+ } else {
|
|
|
+ String publishContentId = articleService.getPublishContentByWxSn(wxSn);
|
|
|
+ if (StringUtils.hasText(publishContentId)) {
|
|
|
+ publishContentIds.add(publishContentId);
|
|
|
+ saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ if (filterUrlList.isEmpty()) {
|
|
|
+ log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ int urlLen = filterUrlList.size();
|
|
|
+ String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
|
|
|
+ log.info("url_len: " + list.size() + ", " + urlLen);
|
|
|
+ IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
|
|
|
+ if (StringUtils.hasText(produceId)) {
|
|
|
+ articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
+ }
|
|
|
+ log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
|
|
|
+ if (CollectionUtils.isNotEmpty(publishContentIds)) {
|
|
|
+ planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
|
|
|
+ if (StringUtils.hasText(produceId)) {
|
|
|
+ articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private List<ProduceContentListItemVO> getProduceContentList(String accountNickName, String pos, String way) {
|
|
|
+ List<String> planIdList = getProducePlanIdList(accountNickName, pos, way);
|
|
|
+ CommonListDataVO<ProduceContentListItemVO> contentData = getProduceContentListByPlanIdList(planIdList);
|
|
|
+ return contentData.getData();
|
|
|
+ }
|
|
|
+
|
|
|
+ public List<String> getProducePlanIdList(String accountNickname, String pos, String way) {
|
|
|
+ List<String> res = new ArrayList<>();
|
|
|
+ if (!produceConfig.containsKey(accountNickname)) {
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+ Map<String, Map<String, String>> accountConfig = produceConfig.get(accountNickname);
|
|
|
+ for (String posKey : accountConfig.keySet()) {
|
|
|
+ if (pos != null && !pos.equals(posKey)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ Map<String, String> posConfig = accountConfig.get(posKey);
|
|
|
+ for (String wayKey : posConfig.keySet()) {
|
|
|
+ if (way != null && !way.equals(wayKey)) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+ String planId = posConfig.get(wayKey).trim();
|
|
|
+ if (!planId.isEmpty()) {
|
|
|
+ res.add(planId);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return res;
|
|
|
+ }
|
|
|
+
|
|
|
+ public CommonListDataVO<ProduceContentListItemVO> getProduceContentListByPlanIdList(List<String> planIdList) {
|
|
|
+ CommonListDataVO<ProduceContentListItemVO> result = new CommonListDataVO<>();
|
|
|
+ if (planIdList.isEmpty()) {
|
|
|
+ log.info("getProduceContentListByPlanIdList: planIdList empty");
|
|
|
+ result.setData(new ArrayList<>());
|
|
|
+ result.setTotalCount(0);
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+ int pageSize = 500;
|
|
|
+ List<Integer> produceStatus = Arrays.asList(1,2,3,4,5,6);
|
|
|
+ CommonListDataVO<ProduceContentListItemVO> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
|
|
|
+ int totalCnt = rawData.getTotalCount();
|
|
|
+ int pageNumMax = totalCnt / pageSize;
|
|
|
+ List<ProduceContentListItemVO> allContent = new ArrayList<>();
|
|
|
+ for (int i = 0; i <= pageNumMax; i++) {
|
|
|
+ CommonListDataVO<ProduceContentListItemVO> pageData = aigcProduceContentListService.list(planIdList, i + 1, pageSize, produceStatus);
|
|
|
+ allContent.addAll(pageData.getData());
|
|
|
+ }
|
|
|
+ List<ProduceContentListItemVO> filteredContent = new ArrayList<>();
|
|
|
+ for (ProduceContentListItemVO content : allContent) {
|
|
|
+ if (StringUtils.hasText(content.getTitle())) {
|
|
|
+ filteredContent.add(content);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ result.setData(filteredContent);
|
|
|
+ result.setTotalCount(filteredContent.size());
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ public String getUrlId(String url) {
|
|
|
+ if (url == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ // 检查是否包含特定的前缀
|
|
|
+ if (url.contains("https://mp.weixin.qq.com/s/")) {
|
|
|
+ return url.split("https://mp.weixin.qq.com/s/")[1];
|
|
|
+ }
|
|
|
+ Map<String, String> params = new HashMap<>();
|
|
|
+ try {
|
|
|
+ String pureUrl = URLDecoder.decode(url, "utf-8");
|
|
|
+ // 解析 URL 参数
|
|
|
+ params = parseQueryString(pureUrl);
|
|
|
+
|
|
|
+ String biz = params.get("http://mp.weixin.qq.com/s?__biz");
|
|
|
+ String sn = params.get("sn");
|
|
|
+ String mid = params.get("mid");
|
|
|
+ String idx = params.get("idx");
|
|
|
+ if (biz != null && sn != null && mid != null && idx != null) {
|
|
|
+ return String.format("biz=%s_mid=%s_idx=%s_sn=%s", biz, mid, idx, sn);
|
|
|
+ }
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("get_url_id error: " + url);
|
|
|
+ }
|
|
|
+
|
|
|
+ return params.get("sn");
|
|
|
+ }
|
|
|
+
|
|
|
+ // 辅助方法:解析查询参数
|
|
|
+ private Map<String, String> parseQueryString(String url) {
|
|
|
+ Map<String, String> params = new java.util.HashMap<>();
|
|
|
+ if (url.contains("?")) {
|
|
|
+ String query = url.substring(url.indexOf("?") + 1);
|
|
|
+ for (String param : query.split("&")) {
|
|
|
+ String[] keyValue = param.split("=", 2);
|
|
|
+ if (keyValue.length == 2) {
|
|
|
+ params.put(keyValue[0], keyValue[1]);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return params;
|
|
|
+ }
|
|
|
+
|
|
|
+ public WxContentDetailResponse getArticleDetail(String url) {
|
|
|
+ if (url == null) {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+ try {
|
|
|
+ return wxFetchRemoteService.getContent(url);
|
|
|
+ } catch (Exception e) {
|
|
|
+ log.error("URL error: " + url);
|
|
|
+ }
|
|
|
+ return null;
|
|
|
+ }
|
|
|
+
|
|
|
+ private void saveArticlePoolPromotionSource(String channelContentId, String wxSn, String title, String level) {
|
|
|
+ ArticlePoolPromotionSource articlePromotion = articlePoolPromotionSourceRepository.getByChannelContentId(channelContentId);
|
|
|
+ if (Objects.isNull(articlePromotion)) {
|
|
|
+ articlePromotion = new ArticlePoolPromotionSource();
|
|
|
+ articlePromotion.setChannelContentId(channelContentId);
|
|
|
+ articlePromotion.setTitle(title);
|
|
|
+ articlePromotion.setTitleMd5(Md5Util.encoderByMd5(title));
|
|
|
+ articlePromotion.setCreateTimestamp(System.currentTimeMillis());
|
|
|
+ }
|
|
|
+ articlePromotion.setWxSn(wxSn);
|
|
|
+ articlePromotion.setLevel(level);
|
|
|
+ articlePromotion.setUpdateTimestamp(System.currentTimeMillis());
|
|
|
+ articlePoolPromotionSourceRepository.save(articlePromotion);
|
|
|
+ }
|
|
|
+
|
|
|
+ private void articleAddDependPlan(String produceId, String planId, String planName) {
|
|
|
+ // 获取生产计划的详细信息
|
|
|
+ ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
|
|
|
+ if (detail == null) {
|
|
|
+ log.info("Failed to fetch produce plan detail.");
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ // 获取依赖计划 ID 列表
|
|
|
+ List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
|
|
|
+ List<String> dependPlanIds = new ArrayList<>();
|
|
|
+ for (ProducePlanInputSourceParam inputSource : inputSources) {
|
|
|
+ dependPlanIds.add(inputSource.getInputSourceValue());
|
|
|
+ }
|
|
|
+ // 如果计划 ID 已存在,直接返回
|
|
|
+ if (dependPlanIds.contains(planId)) {
|
|
|
+ log.info("depend_plan_id exist: {}", planId);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+ aigcProducePlanSaveService.save(planName, planId, detail);
|
|
|
+ }
|
|
|
+
|
|
|
+}
|