Explorar o código

Merge branch 'wyp/1106-datastat-score-db' of Server/long-article-recommend into master

wangyunpeng hai 10 meses
pai
achega
a73a65ab6e

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -1,5 +1,6 @@
 package com.tzld.longarticle.recommend.server.mapper.longArticle;
 
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatScore;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import org.apache.ibatis.annotations.Mapper;
 
@@ -14,4 +15,5 @@ public interface LongArticleBaseMapper {
 
     void batchInsertDatastatSortStrategy(List<DatastatSortStrategy> list);
 
+    void batchInsertDatastatScore(List<DatastatScore> list);
 }

+ 68 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/entity/longArticle/DatastatScore.java

@@ -0,0 +1,68 @@
+package com.tzld.longarticle.recommend.server.model.entity.longArticle;
+
+import lombok.AllArgsConstructor;
+import lombok.Data;
+import lombok.NoArgsConstructor;
+
+import javax.persistence.Column;
+import javax.persistence.Entity;
+import javax.persistence.Id;
+import javax.persistence.Table;
+
+@Data
+@AllArgsConstructor
+@NoArgsConstructor
+@Entity
+@Table(name = "datastat_score")
+public class DatastatScore {
+
+    @Id
+    @Column(name = "id")
+    private Long id;
+
+    @Column(name = "dt")
+    private String dt;
+
+    @Column(name = "gh_id")
+    private String ghId;
+
+    @Column(name = "index")
+    private Integer index;
+
+    @Column(name = "account_name")
+    private String accountName;
+
+    @Column(name = "title")
+    private String title;
+
+    @Column(name = "strategy")
+    private String strategy;
+
+    @Column(name = "score")
+    private Double score;
+
+    @Column(name = "similarity")
+    private Double similarity;
+
+    @Column(name = "view_count_rate")
+    private Double viewCountRate;
+
+    @Column(name = "his_fission_avg_read_rate_rate")
+    private Double hisFissionAvgReadRateRate;
+
+    @Column(name = "his_fission_avg_read_sum_rate")
+    private Double hisFissionAvgReadSumRate;
+
+    @Column(name = "his_fission_de_weight_avg_read_sum_rate")
+    private Double hisFissionDeWeightAvgReadSumRate;
+
+    @Column(name = "read_count")
+    private Integer readCount;
+
+    @Column(name = "read_avg")
+    private Double readAvg;
+
+    @Column(name = "read_avg_rate")
+    private Double readAvgRate;
+
+}

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/vo/FirstContentScoreExport.java

@@ -8,6 +8,7 @@ public class FirstContentScoreExport {
     private String dateStr;
     private String ghId;
     private String accountName;
+    private Integer index;
     private String title;
     private String strategy;
 

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/crawler/PublishSortLogRepository.java

@@ -25,4 +25,5 @@ public interface PublishSortLogRepository extends JpaRepository<PublishSortLog,
 
     List<PublishSortLog> findByDateStrIn(List<String> dateStrList);
 
+    List<PublishSortLog> findByDateStrAndGhId(String dateStr, String ghId);
 }

+ 12 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/DatastatScoreRepository.java

@@ -0,0 +1,12 @@
+package com.tzld.longarticle.recommend.server.repository.longArticle;
+
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatScore;
+import org.springframework.data.jpa.repository.JpaRepository;
+import org.springframework.stereotype.Repository;
+
+import java.util.List;
+
+@Repository
+public interface DatastatScoreRepository extends JpaRepository<DatastatScore, Long> {
+    void deleteByDtIn(List<String> dateStrList);
+}

+ 63 - 18
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/DataDashboardService.java

@@ -16,6 +16,7 @@ import com.tzld.longarticle.recommend.server.model.entity.crawler.AccountAvgInfo
 import com.tzld.longarticle.recommend.server.model.entity.crawler.Article;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.ArticleDetailInfo;
 import com.tzld.longarticle.recommend.server.model.entity.crawler.PublishSortLog;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatScore;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.DatastatSortStrategy;
 import com.tzld.longarticle.recommend.server.model.param.MiniprogramTaskParam;
 import com.tzld.longarticle.recommend.server.model.param.PublishContentParam;
@@ -28,6 +29,7 @@ import com.tzld.longarticle.recommend.server.repository.crawler.AccountAvgInfoRe
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleDetailInfoRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.ArticleRepository;
 import com.tzld.longarticle.recommend.server.repository.crawler.PublishSortLogRepository;
+import com.tzld.longarticle.recommend.server.repository.longArticle.DatastatScoreRepository;
 import com.tzld.longarticle.recommend.server.service.recommend.score.ScoreStrategy;
 import com.tzld.longarticle.recommend.server.util.DateUtils;
 import com.tzld.longarticle.recommend.server.util.MapBuilder;
@@ -75,6 +77,8 @@ public class DataDashboardService {
     private ProducePlanRepository producePlanRepository;
     @Autowired
     private ProducePlanInputSourceRepository producePlanInputSourceRepository;
+    @Autowired
+    private DatastatScoreRepository datastatScoreRepository;
 
     @ApolloJsonValue("${export.account.ghId:[]}")
     private static List<String> ghIdList;
@@ -1347,7 +1351,6 @@ public class DataDashboardService {
 
         List<Pair<String, String>> styles = Arrays
                 .asList(
-                        Pair.of("F", "#,##0.00"),
                         Pair.of("G", "#,##0.00"),
                         Pair.of("H", "#,##0.00"),
                         Pair.of("I", "#,##0.00"),
@@ -1359,7 +1362,8 @@ public class DataDashboardService {
                         Pair.of("O", "#,##0.00"),
                         Pair.of("P", "#,##0.00"),
                         Pair.of("Q", "#,##0.00"),
-                        Pair.of("T", "#,##0.00")
+                        Pair.of("R", "#,##0.00"),
+                        Pair.of("U", "#,##0.00")
                 );
 
         doSendFeishuSheet(dateStrList, sheetToken, sheetId, rowNum, rows, 2, styles);
@@ -1368,16 +1372,16 @@ public class DataDashboardService {
     private List<FirstContentScoreExport> firstContentScoreData(List<String> dateStrList) {
         List<FirstContentScoreExport> result = new ArrayList<>();
         List<PublishSortLog> sortLogList = publishSortLogRepository.findByDateStrIn(dateStrList);
-        sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
+        sortLogList = sortLogList.stream().filter(o -> o.getIndex() == 1 || o.getIndex() == 2).collect(Collectors.toList());
         sortLogList.sort(Comparator.comparing(PublishSortLog::getGhId).thenComparing(PublishSortLog::getDateStr));
         List<String> ghIds = sortLogList.stream().map(PublishSortLog::getGhId).distinct().collect(Collectors.toList());
         long minTimestamp = DateUtils.dateStrToTimestamp(dateStrList.get(0), "yyyyMMdd");
         List<Article> articleList = articleRepository.getByGhIdInAndUpdateTimeGreaterThanAndTypeEquals(ghIds, minTimestamp, ArticleTypeEnum.QUNFA.getVal());
-        articleList = articleList.stream().filter(o -> o.getItemIndex() == 1).collect(Collectors.toList());
-        Map<String, Map<String, Article>> articleMap = articleList.stream().collect(
-                Collectors.groupingBy(Article::getGhId, Collectors.toMap(
-                        o -> DateUtils.timestampToYMDStr(o.getUpdateTime(), "yyyyMMdd"), o -> o,
-                        (existing, replacement) -> replacement)));
+        articleList = articleList.stream().filter(o -> o.getItemIndex() == 1 || o.getItemIndex() == 2).collect(Collectors.toList());
+        Map<String, Map<String, Map<Integer, Article>>> articleMap = articleList.stream().collect(
+                Collectors.groupingBy(Article::getGhId, Collectors.groupingBy(o -> DateUtils.timestampToYMDStr(o.getUpdateTime(), "yyyyMMdd"),
+                        Collectors.toMap(Article::getItemIndex, o -> o,
+                                (existing, replacement) -> replacement))));
         List<String> titleList = articleList.stream().map(Article::getTitle).distinct().collect(Collectors.toList());
         List<PublishContent> hisPublishList = new ArrayList<>();
         for (List<String> partitions : Lists.partition(new ArrayList<>(titleList), 100)) {
@@ -1386,17 +1390,23 @@ public class DataDashboardService {
         Map<String, List<PublishContent>> hisPublishMap = hisPublishList.stream().collect(Collectors.groupingBy(PublishContent::getTitle));
         String ymd = DateUtils.timestampToYMDStr(minTimestamp - 86400 * 7, "yyyy-MM-dd");
         List<AccountAvgInfo> accountAvgInfoList = accountAvgInfoRepository.getAllByUpdateTimeGreaterThanEqual(ymd);
-        Map<String, Map<String, AccountAvgInfo>> accountAvgInfoMap = accountAvgInfoList.stream()
-                .filter(o -> Objects.equals(o.getPosition(), "1")).collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
-                        Collectors.toMap(AccountAvgInfo::getUpdateTime, o -> o)));
+        Map<String, Map<String, Map<String, AccountAvgInfo>>> accountAvgInfoMap = accountAvgInfoList.stream()
+                .filter(o -> Objects.equals(o.getPosition(), "1") || Objects.equals(o.getPosition(), "2"))
+                .collect(Collectors.groupingBy(AccountAvgInfo::getGhId,
+                        Collectors.groupingBy(AccountAvgInfo::getUpdateTime,
+                                Collectors.toMap(AccountAvgInfo::getPosition, o -> o))));
 
         String title = "";
         for (PublishSortLog publishSortLog : sortLogList) {
-            Map<String, Article> dateArticleMap = articleMap.get(publishSortLog.getGhId());
+            Map<String, Map<Integer, Article>> dateArticleMap = articleMap.get(publishSortLog.getGhId());
             if (Objects.isNull(dateArticleMap)) {
                 continue;
             }
-            Article article = dateArticleMap.get(publishSortLog.getDateStr());
+            Map<Integer, Article> indexMap = dateArticleMap.get(publishSortLog.getDateStr());
+            if (Objects.isNull(indexMap)) {
+                continue;
+            }
+            Article article = indexMap.get(publishSortLog.getIndex());
             if (Objects.isNull(article) || !publishSortLog.getTitle().equals(article.getTitle())) {
                 continue;
             }
@@ -1408,6 +1418,7 @@ public class DataDashboardService {
             item.setDateStr(publishSortLog.getDateStr());
             item.setGhId(publishSortLog.getGhId());
             item.setAccountName(publishSortLog.getAccountName());
+            item.setIndex(publishSortLog.getIndex());
             item.setTitle(publishSortLog.getTitle());
             item.setStrategy(publishSortLog.getStrategy());
             item.setScore(Double.valueOf(publishSortLog.getScore()));
@@ -1424,15 +1435,18 @@ public class DataDashboardService {
             item.setViewCountRateStrategy(scoreMap.getDoubleValue("ViewCountRateStrategy"));
             item.setHisFissionDeWeightAvgReadSumRateStrategy(scoreMap.getDoubleValue("HisFissionDeWeightAvgReadSumRateStrategy"));
             item.setReadCount(article.getShowViewCount());
-            Map<String, AccountAvgInfo> map = accountAvgInfoMap.get(article.getGhId());
+            Map<String, Map<String, AccountAvgInfo>> map = accountAvgInfoMap.get(article.getGhId());
             if (Objects.nonNull(map)) {
                 List<String> avgMapDateList = new ArrayList<>(map.keySet());
                 String publishDate = DateUtils.findNearestDate(avgMapDateList,
                         DateUtils.timestampToYMDStr(article.getUpdateTime(), "yyyy-MM-dd"), "yyyy-MM-dd");
-                AccountAvgInfo accountAvgInfo = map.get(publishDate);
-                if (Objects.nonNull(accountAvgInfo)) {
-                    item.setReadAvg(accountAvgInfo.getReadAvg());
-                    item.setReadAvgRate(article.getShowViewCount() / (double) accountAvgInfo.getReadAvg());
+                Map<String, AccountAvgInfo> avgIndexMap = map.get(publishDate);
+                if (Objects.nonNull(avgIndexMap)) {
+                    AccountAvgInfo accountAvgInfo = avgIndexMap.get(String.valueOf(publishSortLog.getIndex()));
+                    if (Objects.nonNull(accountAvgInfo)) {
+                        item.setReadAvg(accountAvgInfo.getReadAvg());
+                        item.setReadAvgRate(article.getShowViewCount() / (double) accountAvgInfo.getReadAvg());
+                    }
                 }
             }
             List<PublishContent> hisPublish = hisPublishMap.get(article.getTitle());
@@ -1441,9 +1455,40 @@ public class DataDashboardService {
             item.setFirstExplorationIntervalAvg(explorationInterval);
             result.add(item);
         }
+        saveDatastatScore(dateStrList, result);
+        result = result.stream().filter(o -> o.getIndex() == 1).collect(Collectors.toList());
         result.sort(Comparator.comparing(FirstContentScoreExport::getDateStr).reversed()
                 .thenComparing(FirstContentScoreExport::getGhId));
         return result;
     }
 
+    private void saveDatastatScore(List<String> dateStrList, List<FirstContentScoreExport> result) {
+        if (CollectionUtils.isNotEmpty(result)) {
+            datastatScoreRepository.deleteByDtIn(dateStrList);
+            List<DatastatScore> saveList = new ArrayList<>();
+            for (FirstContentScoreExport value : result) {
+                DatastatScore item = new DatastatScore();
+                item.setDt(value.getDateStr());
+                item.setGhId(value.getGhId());
+                item.setAccountName(value.getAccountName());
+                item.setIndex(value.getIndex());
+                item.setTitle(value.getTitle());
+                item.setStrategy(value.getStrategy());
+                item.setScore(value.getScore());
+                item.setHisFissionAvgReadRateRate(value.getHisFissionAvgReadRateRateStrategy());
+                item.setHisFissionAvgReadSumRate(value.getHisFissionAvgReadSumRateStrategy());
+                item.setSimilarity(value.getSimilarityStrategy());
+                item.setViewCountRate(value.getViewCountRateStrategy());
+                item.setHisFissionDeWeightAvgReadSumRate(value.getHisFissionDeWeightAvgReadSumRateStrategy());
+                item.setReadCount(value.getReadCount());
+                item.setReadAvg(value.getReadAvg());
+                item.setReadAvgRate(value.getReadAvgRate());
+                saveList.add(item);
+            }
+            for (List<DatastatScore> saveListPartition : Lists.partition(saveList, 1000)) {
+                longArticleBaseMapper.batchInsertDatastatScore(saveListPartition);
+            }
+        }
+    }
+
 }

+ 12 - 3
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/RecommendService.java

@@ -275,13 +275,20 @@ public class RecommendService {
                 break;
 
         }
+        String dateStr = DateUtils.getCurrentDateStr("yyyyMMdd");
         List<PublishSortLog> publishSortLogSaveList = new ArrayList<>();
         List<AccountAvgInfo> avgInfoList = accountAvgInfoRepository.getAllByGhIdEqualsAndStatusEquals(param.getGhId(), 1);
+        List<PublishSortLog> hisSortLog = publishSortLogRepository.findByDateStrAndGhId(dateStr, param.getGhId());
+        List<String> hisSortTitles = hisSortLog.stream().map(PublishSortLog::getTitle).collect(Collectors.toList());
         for (int i = 1; i < rankResult.getContents().size() + 1; i++) {
             Content content = rankResult.getContents().get(i - 1);
-
+            if (CollectionUtils.isNotEmpty(hisSortLog)) {
+                if (hisSortTitles.contains(content.getTitle())) {
+                    continue;
+                }
+            }
             PublishSortLog sortLog = new PublishSortLog();
-            sortLog.setDateStr(DateUtils.getCurrentDateStr("yyyyMMdd"));
+            sortLog.setDateStr(dateStr);
             sortLog.setGhId(param.getGhId());
             sortLog.setAccountName(param.getAccountName());
             sortLog.setCrawlerChannelContentId(content.getCrawlerChannelContentId());
@@ -297,7 +304,9 @@ public class RecommendService {
             sortLog.setCreateTimestamp(System.currentTimeMillis());
             publishSortLogSaveList.add(sortLog);
         }
-        publishSortLogRepository.saveAll(publishSortLogSaveList);
+        if (CollectionUtils.isNotEmpty(publishSortLogSaveList)) {
+            publishSortLogRepository.saveAll(publishSortLogSaveList);
+        }
     }
 
 }

+ 14 - 0
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -41,4 +41,18 @@
         </foreach>
     </insert>
 
+    <insert id="batchInsertDatastatScore">
+        INSERT INTO datastat_score
+        (dt, gh_id, account_name, `index`, title, strategy, score, similarity, view_count_rate,
+         his_fission_avg_read_rate_rate, his_fission_avg_read_sum_rate, his_fission_de_weight_avg_read_sum_rate,
+        read_count, read_avg, read_avg_rate)
+        VALUES
+        <foreach collection="list" item="item" separator=",">
+            (#{item.dt}, #{item.ghId}, #{item.accountName}, #{item.index}, #{item.title}, #{item.strategy},
+             #{item.score}, #{item.similarity}, #{item.viewCountRate}, #{item.hisFissionAvgReadRateRate},
+            #{item.hisFissionAvgReadSumRate}, #{item.hisFissionDeWeightAvgReadSumRate}, #{item.readCount},
+             #{item.readAvg}, #{item.readAvg})
+        </foreach>
+    </insert>
+
 </mapper>