Browse Source

Merge branch 'wyp/0402-export' of Server/long-article-recommend into master

wangyunpeng 6 months ago
parent
commit
4e3df8437f

+ 3 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -5,6 +5,7 @@ import com.tzld.longarticle.recommend.server.model.dto.GetOffVideos;
 import com.tzld.longarticle.recommend.server.model.dto.LongArticlesVideoDTO;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountCorrelation;
+import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesRootSourceId;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.LongArticlesText;
@@ -46,4 +47,6 @@ public interface CrawlerBaseMapper {
 
     List<AccountAvgInfo> getAllByGhIdIn(Set<String> ghIdList);
 
+    List<Article> getByGhIdInAndPublishTimestampLessThanAndTypeEquals(List<String> ghIds, Long publishTimestamp, String type);
+
 }

+ 5 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/DataDashboardService.java

@@ -19,6 +19,7 @@ import com.tzld.longarticle.recommend.server.common.enums.recommend.ContentPoolE
 import com.tzld.longarticle.recommend.server.common.enums.recommend.RankStrategyEnum;
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.aigc.PublishContentMapper;
+import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.ContentPoolTypeDTO;
 import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
@@ -107,6 +108,8 @@ public class DataDashboardService {
     private LongArticleTitleAuditRepository longArticleTitleAuditRepository;
     @Autowired
     private PublishSingleVideoSourceRepository videoPoolRepository;
+    @Autowired
+    private CrawlerBaseMapper crawlerBaseMapper;
 
     @ApolloJsonValue("${export.account.ghId:[]}")
     private static List<String> ghIdList;
@@ -1100,7 +1103,7 @@ public class DataDashboardService {
                 .collect(Collectors.groupingBy(AccountAvgInfo::getGhId, Collectors.groupingBy(AccountAvgInfo::getUpdateTime,
                         Collectors.toMap(AccountAvgInfo::getPosition, o -> o))));
         List<String> ghIds = accountAvgInfoList.stream().map(AccountAvgInfo::getGhId).distinct().collect(Collectors.toList());
-        List<Article> articleList = articleRepository.getByGhIdInAndPublishTimestampLessThanAndTypeEquals(ghIds, dateEnd, ArticleTypeEnum.QUNFA.getVal());
+        List<Article> articleList = crawlerBaseMapper.getByGhIdInAndPublishTimestampLessThanAndTypeEquals(ghIds, dateEnd, ArticleTypeEnum.QUNFA.getVal());
         Map<String, Article> articleMap = articleList.stream().collect(Collectors.toMap(Article::getWxSn, o -> o));
         List<Article> todayPublish = articleList.stream().filter(o -> o.getPublishTimestamp() > dateStart).collect(Collectors.toList());
         if (CollectionUtils.isEmpty(todayPublish)) {
@@ -1110,7 +1113,7 @@ public class DataDashboardService {
         List<String> wxSnList = articleList.stream().map(Article::getWxSn).distinct().collect(Collectors.toList());
         List<ArticleDetailInfo> articleDetailInfoList = new ArrayList<>();
         for (List<String> partitions : Lists.partition(wxSnList, 1000)) {
-            articleDetailInfoList.addAll(articleDetailInfoRepository.getAllByWxSnIn(partitions));
+            articleDetailInfoList.addAll(crawlerBaseMapper.getAllByWxSnIn(partitions));
         }
         Map<String, List<ArticleDetailInfo>> articleDetailInfoMap = articleDetailInfoList.stream()
                 .collect(Collectors.groupingBy(ArticleDetailInfo::getWxSn));

+ 14 - 1
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -69,7 +69,8 @@
 
     <select id="getAllByWxSnIn"
             resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo">
-        select wx_sn, recall_dt, first_level, fission_0
+        select wx_sn, recall_dt, first_level, fission_0, fission_0_head, fission_0_recommend, fission_1, fission_1_head,
+        fission_1_recommend, fission_2, fission_2_head, fission_2_recommend
         from long_articles_detail_info
         where wx_sn in
         <foreach collection="wxSnList" item="item" separator="," open="(" close=")">
@@ -87,4 +88,16 @@
         </foreach>
     </select>
 
+    <select id="getByGhIdInAndPublishTimestampLessThanAndTypeEquals"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.crawler.Article">
+        select ghId, accountName, ItemIndex, title, ContentUrl, wx_sn, publish_timestamp, show_view_count
+        from official_articles_v2
+        where ghId in
+        <foreach collection="ghIds" item="item" separator="," open="(" close=")">
+            #{item}
+        </foreach>
+        and publish_timestamp &lt; #{publishTimestamp}
+        and type = #{type}
+    </select>
+
 </mapper>