Browse Source

Merge branch 'master' into dev-xym-test

xueyiming 7 months ago
parent
commit
9b56421dd2

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/LongArticlesRootSourceIdRepository.java

@@ -10,4 +10,6 @@ import java.util.List;
 public interface LongArticlesRootSourceIdRepository extends JpaRepository<LongArticlesRootSourceId, String> {
 
     List<LongArticlesRootSourceId> getByRootSourceIdIn(List<String> rootSourceIds);
+
+    int countByTraceId(String traceId);
 }

+ 7 - 3
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/XxlJobService.java

@@ -180,7 +180,7 @@ public class XxlJobService {
                     }
                 }
             }
-            saveLongArticlesRootSourceId(saveList);
+            saveLongArticlesRootSourceId(longArticlesVideo.getTraceId(), saveList);
         } catch (DuplicateKeyException e) {
             log.error("Error processCrawlerEachData: {}", JSONObject.toJSONString(longArticlesVideo), e);
             String errMsg = e.getMessage().substring(0, e.getMessage().indexOf("### The error"));
@@ -191,8 +191,12 @@ public class XxlJobService {
         }
     }
 
-    private void saveLongArticlesRootSourceId(List<LongArticlesRootSourceId> saveList) {
+    private void saveLongArticlesRootSourceId(String traceId, List<LongArticlesRootSourceId> saveList) {
         if (CollectionUtil.isNotEmpty(saveList)) {
+            int count = longArticlesRootSourceIdRepository.countByTraceId(traceId);
+            if (count > 0) {
+                return;
+            }
             List<String> rootSourceIds = saveList.stream().map(LongArticlesRootSourceId::getRootSourceId).distinct().collect(Collectors.toList());
             List<LongArticlesRootSourceId> existList = longArticlesRootSourceIdRepository.getByRootSourceIdIn(rootSourceIds);
             for (LongArticlesRootSourceId existItem : existList) {
@@ -250,7 +254,7 @@ public class XxlJobService {
                 saveItem.setVideoId(jsonNode.getLong("videoId"));
                 saveList.add(saveItem);
             }
-            saveLongArticlesRootSourceId(saveList);
+            saveLongArticlesRootSourceId(longArticlesMatchVideo.getTraceId(), saveList);
         } catch (DuplicateKeyException e) {
             log.error("Error processArticleEachData: {}", JSONObject.toJSONString(longArticlesMatchVideo), e);
             String errMsg = e.getMessage().substring(0, e.getMessage().indexOf("### The error"));

+ 83 - 66
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticlePromotionService.java

@@ -2,6 +2,7 @@ package com.tzld.longarticle.recommend.server.service.recommend;
 
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.longarticle.recommend.server.common.enums.aigc.CrawlerModeEnum;
+import com.tzld.longarticle.recommend.server.common.enums.recommend.FeishuRobotIdEnum;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
@@ -23,6 +24,7 @@ import com.tzld.longarticle.recommend.server.repository.longArticle.DatastatSort
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.Md5Util;
 import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
+import com.tzld.longarticle.recommend.server.util.feishu.FeishuMessageSender;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -116,59 +118,66 @@ public class ArticlePromotionService {
             aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag, CrawlerModeEnum.ContentIDs.getVal());
             return;
         }
-        String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
-        List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
-        // 获取已访问的标题和URL
-        List<String> visitedTitleList = contentList.stream()
-                .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
-                .distinct().collect(Collectors.toList());
-        Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
-                .collect(Collectors.toSet());
-        // 筛选URL和标题
-        List<String> publishContentIds = new ArrayList<>();
-        List<String> filterUrlList = new ArrayList<>();
-        for (DatastatSortStrategy item : list) {
-            String url = item.getLink();
-            String urlId = getUrlId(item.getLink());
-            String title = item.getTitle();
-            String wxSn = item.getWxSn();
-            if (visitedUrlIdList.contains(urlId)) {
-                continue;
-            }
-            if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
-                continue;
-            }
-            filterUrlList.add(url);
-            // 调用爬虫 detail 接口并保存数据
-            WxContentDetailResponse detail = getArticleDetail(url);
-            String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
-            if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
-                saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
-            } else {
-                String publishContentId = articleService.getPublishContentByWxSn(wxSn);
-                if (StringUtils.hasText(publishContentId)) {
-                    publishContentIds.add(publishContentId);
-                    saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
+        try {
+            String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
+            List<ProduceContentListItemVO> contentList = getProduceContentList(accountNickName, pos, way);
+            // 获取已访问的标题和URL
+            List<String> visitedTitleList = contentList.stream()
+                    .flatMap(content -> Arrays.stream(new String[]{content.getReferContentTitle(), content.getTitle()}))
+                    .distinct().collect(Collectors.toList());
+            Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
+                    .collect(Collectors.toSet());
+            // 筛选URL和标题
+            List<String> publishContentIds = new ArrayList<>();
+            List<String> filterUrlList = new ArrayList<>();
+            for (DatastatSortStrategy item : list) {
+                String url = item.getLink();
+                String urlId = getUrlId(item.getLink());
+                String title = item.getTitle();
+                String wxSn = item.getWxSn();
+                if (visitedUrlIdList.contains(urlId)) {
+                    continue;
+                }
+                if (TitleSimilarCheckUtil.isDuplicateContent(title, visitedTitleList, TitleSimilarCheckUtil.ARTICLE_PROMOTION_THRESHOLD)) {
+                    continue;
+                }
+                filterUrlList.add(url);
+                // 调用爬虫 detail 接口并保存数据
+                WxContentDetailResponse detail = getArticleDetail(url);
+                String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
+                if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
+                    saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
+                } else {
+                    String publishContentId = articleService.getPublishContentByWxSn(wxSn);
+                    if (StringUtils.hasText(publishContentId)) {
+                        publishContentIds.add(publishContentId);
+                        saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
+                    }
                 }
             }
-        }
-        if (filterUrlList.isEmpty()) {
-            log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
-            return;
-        }
-        int urlLen = filterUrlList.size();
-        String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
-        log.info("url_len: " + list.size() + ", " + urlLen);
-        IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
-        if (StringUtils.hasText(produceId)) {
-            articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
-        }
-        log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
-        if (CollectionUtils.isNotEmpty(publishContentIds)) {
-            planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
+            if (filterUrlList.isEmpty()) {
+                log.info("url_list empty: " + accountNickName + ", " + pos + ", " + way);
+                return;
+            }
+            int urlLen = filterUrlList.size();
+            String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
+            log.info("url_len: " + list.size() + ", " + urlLen);
+            IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
             if (StringUtils.hasText(produceId)) {
                 articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
             }
+            log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
+            if (CollectionUtils.isNotEmpty(publishContentIds)) {
+                planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
+                if (StringUtils.hasText(produceId)) {
+                    articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
+                }
+            }
+        } catch (Exception e) {
+            log.error("articlePromotion error: ", e);
+            FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
+                    "文章晋升ERROR:\n" +
+                            "articlePromotion error: " + e.getMessage());
         }
     }
 
@@ -211,7 +220,7 @@ public class ArticlePromotionService {
             return result;
         }
         int pageSize = 500;
-        List<Integer> produceStatus = Arrays.asList(1,2,3,4,5,6);
+        List<Integer> produceStatus = Arrays.asList(1, 2, 3, 4, 5, 6);
         CommonListDataVO<ProduceContentListItemVO> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
         int totalCnt = rawData.getTotalCount();
         int pageNumMax = totalCnt / pageSize;
@@ -302,24 +311,32 @@ public class ArticlePromotionService {
     }
 
     private void articleAddDependPlan(String produceId, String planId, String planName) {
-        // 获取生产计划的详细信息
-        ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
-        if (detail == null) {
-            log.info("Failed to fetch produce plan detail.");
-            return;
-        }
-        // 获取依赖计划 ID 列表
-        List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
-        List<String> dependPlanIds = new ArrayList<>();
-        for (ProducePlanInputSourceParam inputSource : inputSources) {
-            dependPlanIds.add(inputSource.getInputSourceValue());
-        }
-        // 如果计划 ID 已存在,直接返回
-        if (dependPlanIds.contains(planId)) {
-            log.info("depend_plan_id exist: {}", planId);
-            return;
+        try {
+            // 获取生产计划的详细信息
+            ProducePlanDetailVO detail = aigcProducePlanDetailService.articleGetProducePlanDetail(produceId);
+            if (detail == null) {
+                FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
+                        "文章晋升ERROR:\n" +
+                                "articleAddDependPlan getProducePlan error: planId:" + produceId);
+                return;
+            }
+            // 获取依赖计划 ID 列表
+            List<ProducePlanInputSourceParam> inputSources = detail.getInputSourceGroups().get(0).getInputSources();
+            List<String> dependPlanIds = new ArrayList<>();
+            for (ProducePlanInputSourceParam inputSource : inputSources) {
+                dependPlanIds.add(inputSource.getInputSourceValue());
+            }
+            // 如果计划 ID 已存在,直接返回
+            if (dependPlanIds.contains(planId)) {
+                log.info("depend_plan_id exist: {}", planId);
+                return;
+            }
+            aigcProducePlanSaveService.save(planName, planId, detail);
+        } catch (Exception e) {
+            FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.JOB.getRobotId(),
+                    "文章晋升ERROR:\n" +
+                            "articleAddDependPlan error: " + e.getMessage());
         }
-        aigcProducePlanSaveService.save(planName, planId, detail);
     }
 
 }

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -154,6 +154,7 @@ public class RecallService implements ApplicationContextAware {
         if (CollectionUtils.isEmpty(content)) {
             FeishuMessageSender.sendWebHookMessage(FeishuRobotIdEnum.RECOMMEND.getRobotId(),
                     "内容召回失败\n"
+                            + "planId: " + param.getPlanId() + "\n"
                             + "ghId: " + param.getGhId() + "\n"
                             + "账号名称: " + param.getAccountName());
             return content;