wangyunpeng 7 mesiacov pred
rodič
commit
a7dfc3f99a

+ 47 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/common/enums/aigc/CrawlerModeEnum.java

@@ -0,0 +1,47 @@
+package com.tzld.longarticle.recommend.server.common.enums.aigc;
+
+import lombok.Getter;
+
+/**
+ * @description:抓取模式
+ * @author: TanJingyu
+ * @create:2023-10-09 16:39:29
+ **/
+@Getter
+public enum CrawlerModeEnum {
+    DesignatedBlogger(1, "指定博主"),
+    CategoryBlogger(2, "分类博主"),
+    RecommendedBlogger(3, "推荐流博主"),
+    Account(4, "账号"),
+    ContentIDs(5, "内容IDs"),
+    KeywordSearch(6, "关键词搜索"),
+    HotList(7, "热榜"),
+    Recommended(8, "推荐"),
+    ImageSearch(9, "图片搜索"),
+    AlgorithmRecommended(10, "算法推荐"),
+    BloggerList(11, "博主榜单"),
+    RankList(12, "榜单"),
+    RelRecommended(13, "相关推荐"),
+
+    PublishContentIds(101, "Aigc发布内容Id"),
+
+    Other(999, "其他");
+
+    private final int val;
+    private final String description;
+
+    CrawlerModeEnum(int val, String description) {
+        this.val = val;
+        this.description = description;
+    }
+
+    public static CrawlerModeEnum from(int val) {
+        for (CrawlerModeEnum typeEnum : CrawlerModeEnum.values()) {
+            if (typeEnum.getVal() == val) {
+                return typeEnum;
+            }
+        }
+
+        return Other;
+    }
+}

+ 15 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/aigc/CommonListDataVO.java

@@ -0,0 +1,15 @@
+package com.tzld.longarticle.recommend.server.model.vo.aigc;
+
+import lombok.Getter;
+import lombok.Setter;
+import lombok.experimental.Accessors;
+
+import java.util.List;
+
+@Getter
+@Setter
+@Accessors(chain = true)
+public class CommonListDataVO<T> {
+    private Integer totalCount;
+    private List<T> data;
+}

+ 4 - 3
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/remote/aigc/AIGCCrawlerPlanSaveService.java

@@ -24,9 +24,9 @@ public class AIGCCrawlerPlanSaveService {
 
     private final CloseableHttpClient client = HttpPoolFactory.aigcPool();
 
-    public IdNameVO<String> createArticleUrlPlan(String planName, List<String> urlList, String tag) {
+    public IdNameVO<String> createArticleUrlPlan(String planName, List<String> contentList, String tag, Integer crawlerMode) {
         IdNameVO<String> result = new IdNameVO<>();
-        if (urlList == null || urlList.isEmpty()) {
+        if (contentList == null || contentList.isEmpty()) {
             return null;
         }
         // 从配置中读取模板文件路径
@@ -38,10 +38,11 @@ public class AIGCCrawlerPlanSaveService {
         // 设置请求参数
         JSONObject params = data.getJSONObject("params");
         params.put("name", planName);
+        params.put("crawlerMode", crawlerMode);
         if (tag != null) {
             params.put("planTag", tag);
         }
-        params.put("inputModeValues", urlList);
+        params.put("inputModeValues", contentList);
         // 将数据转为 JSON 格式
         String requestData = data.toString();
         try {

+ 12 - 8
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/remote/aigc/AIGCProduceContentListService.java

@@ -1,8 +1,10 @@
 package com.tzld.longarticle.recommend.server.remote.aigc;
 
 import cn.hutool.core.io.resource.ResourceUtil;
+import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
 import com.tzld.longarticle.recommend.server.common.HttpPoolFactory;
+import com.tzld.longarticle.recommend.server.model.vo.aigc.CommonListDataVO;
 import com.tzld.longarticle.recommend.server.model.vo.aigc.ProduceContentListItemVO;
 import lombok.extern.slf4j.Slf4j;
 import org.apache.http.HttpEntity;
@@ -24,10 +26,10 @@ public class AIGCProduceContentListService {
 
     private final CloseableHttpClient client = HttpPoolFactory.aigcPool();
 
-    public Map<String, Object> list(List<String> planIdList, int pageNum, int pageSize, List<Integer> produceStatus) {
-        Map<String, Object> result = new HashMap<>();
-        result.put("contentList", new ArrayList<>());
-        result.put("totalCnt", 0);
+    public CommonListDataVO<ProduceContentListItemVO> list(List<String> planIdList, int pageNum, int pageSize, List<Integer> produceStatus) {
+        CommonListDataVO<ProduceContentListItemVO> result = new CommonListDataVO<ProduceContentListItemVO>();
+        result.setData(new ArrayList<>());
+        result.setTotalCount(0);
         if (planIdList.isEmpty()) {
             System.out.println("getProduceContentListByPlanIdListRaw: planIdList empty");
             return result;
@@ -59,10 +61,12 @@ public class AIGCProduceContentListService {
                     String responseBody = EntityUtils.toString(responseEntity, "UTF-8");
                     JSONObject jsonObject = JSONObject.parseObject(responseBody);
                     if (jsonObject.getInteger("code") == 0) {
-                        int totalCnt = jsonObject.getJSONObject("data").getInteger("totalCount");
-                        List<ProduceContentListItemVO> contentList = jsonObject.getJSONObject("data").parseArray("data", ProduceContentListItemVO.class);
-                        result.put("contentList", contentList);
-                        result.put("totalCnt", totalCnt);
+                        JSONObject dataObj = jsonObject.getJSONObject("data");
+                        int totalCnt = dataObj.getInteger("totalCount");
+                        JSONArray dataArray = dataObj.getJSONArray("data");
+                        List<ProduceContentListItemVO> contentList = dataArray.toJavaList(ProduceContentListItemVO.class);
+                        result.setData(contentList);
+                        result.setTotalCount(totalCnt);
                         return result;
                     }
                 }

+ 46 - 27
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticlePromotionService.java

@@ -1,12 +1,14 @@
 package com.tzld.longarticle.recommend.server.service.recommend;
 
 import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
+import com.tzld.longarticle.recommend.server.common.enums.aigc.CrawlerModeEnum;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.entity.aigc.PublishAccount;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import com.tzld.longarticle.recommend.server.model.vo.IdNameVO;
 import com.tzld.longarticle.recommend.server.model.vo.WxContentDetailResponse;
+import com.tzld.longarticle.recommend.server.model.vo.aigc.CommonListDataVO;
 import com.tzld.longarticle.recommend.server.model.vo.aigc.ProduceContentListItemVO;
 import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanDetailVO;
 import com.tzld.longarticle.recommend.server.model.vo.aigc.ProducePlanInputSourceParam;
@@ -22,6 +24,7 @@ import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.Md5Util;
 import com.tzld.longarticle.recommend.server.util.TitleSimilarCheckUtil;
 import lombok.extern.slf4j.Slf4j;
+import org.apache.commons.collections4.CollectionUtils;
 import org.springframework.beans.factory.annotation.Autowired;
 import org.springframework.stereotype.Service;
 import org.springframework.util.StringUtils;
@@ -52,6 +55,8 @@ public class ArticlePromotionService {
     WxFetchRemoteService wxFetchRemoteService;
     @Autowired
     PublishAccountRepository publishAccountRepository;
+    @Autowired
+    ArticleService articleService;
 
     @ApolloJsonValue("${articlePromotionProduceConfig:{}}")
     private Map<String, Map<String, Map<String, String>>> produceConfig;
@@ -108,7 +113,7 @@ public class ArticlePromotionService {
         if (!produceConfig.containsKey(accountNickName)) {
             log.info("account_nickname not in produceConfig: " + accountNickName);
             String planName = String.format("%d_%s_%s_%s【%s】_%s", list.size(), today, accountNickName, pos, way, today);
-            aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag);
+            aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, urlList, tag, CrawlerModeEnum.ContentIDs.getVal());
             return;
         }
         String produceId = produceConfig.get(accountNickName).get(pos).get(way).trim();
@@ -119,8 +124,8 @@ public class ArticlePromotionService {
                 .distinct().collect(Collectors.toList());
         Set<String> visitedUrlIdList = contentList.stream().map(content -> getUrlId(content.getReferContentLink()))
                 .collect(Collectors.toSet());
-
         // 筛选URL和标题
+        List<String> publishContentIds = new ArrayList<>();
         List<String> filterUrlList = new ArrayList<>();
         for (DatastatSortStrategy item : list) {
             String url = item.getLink();
@@ -136,9 +141,15 @@ public class ArticlePromotionService {
             filterUrlList.add(url);
             // 调用爬虫 detail 接口并保存数据
             WxContentDetailResponse detail = getArticleDetail(url);
-            if (detail != null) {
-                String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
+            String level = pos.equals("【1】") ? contentPoolType.get(0) : contentPoolType.get(1);
+            if (detail != null && StringUtils.hasText(detail.getChannelContentId())) {
                 saveArticlePoolPromotionSource(detail.getChannelContentId(), wxSn, title, level);
+            } else {
+                String publishContentId = articleService.getPublishContentByWxSn(wxSn);
+                if (StringUtils.hasText(publishContentId)) {
+                    publishContentIds.add(publishContentId);
+                    saveArticlePoolPromotionSource(Md5Util.encoderByMd5(publishContentId), wxSn, title, level);
+                }
             }
         }
         if (filterUrlList.isEmpty()) {
@@ -148,18 +159,23 @@ public class ArticlePromotionService {
         int urlLen = filterUrlList.size();
         String planName = String.format("%d_%s_%s_%s【%s】_%s", urlLen, today, accountNickName, pos, way, today);
         log.info("url_len: " + list.size() + ", " + urlLen);
-        IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag);
-        if (produceId.isEmpty()) {
-            log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
-            return;
+        IdNameVO<String> planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, filterUrlList, tag, CrawlerModeEnum.ContentIDs.getVal());
+        if (StringUtils.hasText(produceId)) {
+            articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
+        }
+        log.info("{}, {}, produce plan not exist: {}, {}, {}", planInfo.getName(), planInfo.getId(), accountNickName, pos, way);
+        if (CollectionUtils.isNotEmpty(publishContentIds)) {
+            planInfo = aigcCrawlerPlanSaveService.createArticleUrlPlan(planName, publishContentIds, tag, CrawlerModeEnum.PublishContentIds.getVal());
+            if (StringUtils.hasText(produceId)) {
+                articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
+            }
         }
-        articleAddDependPlan(produceId, planInfo.getId(), planInfo.getName());
     }
 
     private List<ProduceContentListItemVO> getProduceContentList(String accountNickName, String pos, String way) {
         List<String> planIdList = getProducePlanIdList(accountNickName, pos, way);
-        Map<String, Object> contentData = getProduceContentListByPlanIdList(planIdList);
-        return (List<ProduceContentListItemVO>) contentData.get("contentList");
+        CommonListDataVO<ProduceContentListItemVO> contentData = getProduceContentListByPlanIdList(planIdList);
+        return contentData.getData();
     }
 
     public List<String> getProducePlanIdList(String accountNickname, String pos, String way) {
@@ -186,23 +202,23 @@ public class ArticlePromotionService {
         return res;
     }
 
-    public Map<String, Object> getProduceContentListByPlanIdList(List<String> planIdList) {
-        Map<String, Object> result = new HashMap<>();
+    public CommonListDataVO<ProduceContentListItemVO> getProduceContentListByPlanIdList(List<String> planIdList) {
+        CommonListDataVO<ProduceContentListItemVO> result = new CommonListDataVO<>();
         if (planIdList.isEmpty()) {
             System.out.println("getProduceContentListByPlanIdList: planIdList empty");
-            result.put("contentList", new ArrayList<>());
-            result.put("totalCnt", 0);
+            result.setData(new ArrayList<>());
+            result.setTotalCount(0);
             return result;
         }
         int pageSize = 500;
         List<Integer> produceStatus = Arrays.asList(1,2,3,4,5,6);
-        Map<String, Object> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
-        int totalCnt = (int) rawData.get("totalCnt");
+        CommonListDataVO<ProduceContentListItemVO> rawData = aigcProduceContentListService.list(planIdList, 1, 1, produceStatus);
+        int totalCnt = rawData.getTotalCount();
         int pageNumMax = totalCnt / pageSize;
         List<ProduceContentListItemVO> allContent = new ArrayList<>();
         for (int i = 0; i <= pageNumMax; i++) {
-            Map<String, Object> pageData = aigcProduceContentListService.list(planIdList, i + 1, pageSize, produceStatus);
-            allContent.addAll((Collection<? extends ProduceContentListItemVO>) pageData.get("contentList"));
+            CommonListDataVO<ProduceContentListItemVO> pageData = aigcProduceContentListService.list(planIdList, i + 1, pageSize, produceStatus);
+            allContent.addAll(pageData.getData());
         }
         List<ProduceContentListItemVO> filteredContent = new ArrayList<>();
         for (ProduceContentListItemVO content : allContent) {
@@ -210,8 +226,8 @@ public class ArticlePromotionService {
                 filteredContent.add(content);
             }
         }
-        result.put("contentList", filteredContent);
-        result.put("totalCnt", filteredContent.size());
+        result.setData(filteredContent);
+        result.setTotalCount(filteredContent.size());
         return result;
     }
 
@@ -245,7 +261,6 @@ public class ArticlePromotionService {
 
     // 辅助方法:解析查询参数
     private Map<String, String> parseQueryString(String url) {
-        // 示例解析逻辑
         Map<String, String> params = new java.util.HashMap<>();
         if (url.contains("?")) {
             String query = url.substring(url.indexOf("?") + 1);
@@ -272,13 +287,17 @@ public class ArticlePromotionService {
     }
 
     private void saveArticlePoolPromotionSource(String channelContentId, String wxSn, String title, String level) {
-        ArticlePoolPromotionSource articlePromotion = new ArticlePoolPromotionSource();
-        articlePromotion.setChannelContentId(channelContentId);
+        ArticlePoolPromotionSource articlePromotion = articlePoolPromotionSourceRepository.getByChannelContentId(channelContentId);
+        if (Objects.isNull(articlePromotion)) {
+            articlePromotion = new ArticlePoolPromotionSource();
+            articlePromotion.setChannelContentId(channelContentId);
+            articlePromotion.setTitle(title);
+            articlePromotion.setTitleMd5(Md5Util.encoderByMd5(title));
+            articlePromotion.setCreateTimestamp(System.currentTimeMillis());
+        }
         articlePromotion.setWxSn(wxSn);
-        articlePromotion.setTitle(title);
-        articlePromotion.setTitleMd5(Md5Util.encoderByMd5(title));
         articlePromotion.setLevel(level);
-        articlePromotion.setCreateTimestamp(System.currentTimeMillis());
+        articlePromotion.setUpdateTimestamp(System.currentTimeMillis());
         articlePoolPromotionSourceRepository.save(articlePromotion);
     }
 

+ 15 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -524,4 +524,19 @@ public class ArticleService {
             articleCategoryRepository.save(articleCategory);
         }
     }
+
+    public String getPublishContentByWxSn(String wxSn) {
+        Article article = articleRepository.getByWxSn(wxSn);
+        PublishAccount publishAccount = publishAccountRepository.getByGhId(article.getGhId());
+        if (Objects.isNull(publishAccount)) {
+            return null;
+        }
+        long publishTimestamp = article.getPublishTimestamp() > 0 ? article.getPublishTimestamp() * 1000 : article.getUpdateTime() * 1000;
+        List<PublishContent> publishContentList = aigcBaseMapper.getNearestPublishContent(publishAccount.getId(), publishTimestamp, 100);
+        PublishContent publishContent = findPublishContent(publishContentList, article.getTitle(), publishTimestamp);
+        if (Objects.nonNull(publishContent)) {
+            return publishContent.getId();
+        }
+        return null;
+    }
 }

+ 1 - 1
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -93,7 +93,7 @@
         select *
         from datastat_sort_strategy
         where view_count >= #{viewCount}
-        and avg_view_count >= #{viewCountRate}
+        and read_rate >= #{viewCountRate}
         and fans > #{fans}
         and date_str > #{dateStr}
         and position in