wangyunpeng 7 mesiacov pred
rodič
commit
8637e57ff5

+ 11 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -3,8 +3,10 @@ package com.tzld.longarticle.recommend.server.mapper.crawler;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
 
 import java.util.List;
+import java.util.Set;
 
 public interface CrawlerBaseMapper {
 
@@ -21,4 +23,13 @@ public interface CrawlerBaseMapper {
     void updateArticleSourceRootId(String wxsn, String sourcePublishContentId, String rootPublishContentId, String rootProduceContentId);
 
     List<Article> getWaitingFindArticle(Long timestamp);
+
+    List<Article> getByTitleMd5InAndTypeEqualsAndStatusEquals(List<String> titleMd5s, String type, Integer status);
+
+    List<ArticleDetailInfo> getAllByWxSnIn(List<String> wxSnList);
+
+    List<AccountAvgInfo> getAllByGhIdIn(Set<String> ghIdList);
+
+    List<Article> getByGhIdInAndAppMsgIdInAndItemIndexAndTypeEqualsAndStatusEquals(
+            Set<String> ghIdList, Set<String> appMsgIdList, Integer itemIndex, String type, Integer status);
 }

+ 5 - 5
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/recall/RecallService.java

@@ -214,7 +214,7 @@ public class RecallService implements ApplicationContextAware {
                 content.setCategory(Collections.singletonList(category.getCategory()));
                 continue;
             }
-            log.error("setContentCategory NullError channelContentId:{}", content.getCrawlerChannelContentId());
+//            log.error("setContentCategory NullError channelContentId:{}", content.getCrawlerChannelContentId());
         }
     }
 
@@ -265,7 +265,7 @@ public class RecallService implements ApplicationContextAware {
         List<Article> hisArticleList = new ArrayList<>();
         List<List<String>> titleMd5Partition = Lists.partition(new ArrayList<>(titleMd5List), 1000);
         for (List<String> titleMd5s : titleMd5Partition) {
-            hisArticleList.addAll(articleRepository.getByTitleMd5InAndTypeEqualsAndStatusEquals(titleMd5s, type, 1));
+            hisArticleList.addAll(crawlerBaseMapper.getByTitleMd5InAndTypeEqualsAndStatusEquals(titleMd5s, type, 1));
         }
         Map<String, Map<Integer, List<Article>>> map = hisArticleList.stream()
                 .collect(Collectors.groupingBy(Article::getTitle, Collectors.groupingBy(Article::getItemIndex)));
@@ -273,19 +273,19 @@ public class RecallService implements ApplicationContextAware {
         List<ArticleDetailInfo> articleDetailInfoList = new ArrayList<>();
         List<List<String>> snPartition = Lists.partition(new ArrayList<>(snList), 1000);
         for (List<String> sns : snPartition) {
-            articleDetailInfoList.addAll(articleDetailInfoRepository.getAllByWxSnIn(sns));
+            articleDetailInfoList.addAll(crawlerBaseMapper.getAllByWxSnIn(sns));
         }
         Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
                 .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));
         // 获取历史已发布文章所属头条内容
         Set<String> ghIds = hisArticleList.stream().map(Article::getGhId).collect(Collectors.toSet());
         Set<String> appMsgIds = hisArticleList.stream().map(Article::getAppMsgId).collect(Collectors.toSet());
-        List<Article> firstIndexHisArticleList = articleRepository.getByGhIdInAndAppMsgIdInAndItemIndexAndTypeEqualsAndStatusEquals(
+        List<Article> firstIndexHisArticleList = crawlerBaseMapper.getByGhIdInAndAppMsgIdInAndItemIndexAndTypeEqualsAndStatusEquals(
                 ghIds, appMsgIds, 1, type, 1);
         Map<String, Map<String, Article>> firstIndexHisArticleMap = firstIndexHisArticleList.stream()
                 .collect(Collectors.groupingBy(Article::getGhId, Collectors.toMap(Article::getAppMsgId, o -> o)));
         // 获取发布账号 位置历史均值
-        List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByGhIdIn(ghIds);
+        List<AccountAvgInfo> accountAvgInfoList = crawlerBaseMapper.getAllByGhIdIn(ghIds);
         Map<String, Map<String, Map<String, AccountAvgInfo>>> accountAvgInfoIndexMap = accountAvgInfoList.stream()
                 .filter(o -> Objects.nonNull(o.getReadAvg()) && o.getReadAvg() > 0 && o.getFans() > 1000)
                 .collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.groupingBy(AccountAvgInfo::getUpdateTime,

+ 47 - 0
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -49,4 +49,51 @@
         select * from official_articles_v2 where publish_timestamp > #{timestamp} order by publish_timestamp limit 100
     </select>
 
+    <select id="getByTitleMd5InAndTypeEqualsAndStatusEquals"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.Article">
+        select wx_sn, ghId, appMsgId, title, ItemIndex, publish_timestamp, show_view_count
+        from official_articles_v2
+        where title_md5 in
+        <foreach collection="titleMd5s" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+        and Type = #{type} and status = #{status}
+    </select>
+
+    <select id="getAllByWxSnIn"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo">
+        select wx_sn, recall_dt, first_level, fission_0
+        from long_articles_detail_info
+        where wx_sn in
+        <foreach collection="wxSnList" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+    </select>
+
+    <select id="getAllByGhIdIn"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo">
+        select gh_id, position, update_time, fans, read_avg
+        from account_avg_info_v3
+        where gh_id in
+        <foreach collection="ghIdList" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+    </select>
+    <select id="getByGhIdInAndAppMsgIdInAndItemIndexAndTypeEqualsAndStatusEquals"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.Article">
+        select ghId, appMsgId, show_view_count
+        from official_articles_v2
+        where ghId in
+        <foreach collection="ghIdList" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+        and appMsgId in
+        <foreach collection="appMsgIdList" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+        and itemIndex = #{itemIndex}
+        and Type = #{type}
+        and status = #{status}
+    </select>
+
 </mapper>