فهرست منبع

long_articles_crawler_videos 增加score

wangyunpeng 9 ماه پیش
والد
کامیت
0be564ce33

+ 1 - 1
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/crawler/CrawlerBaseMapper.java

@@ -22,7 +22,7 @@ public interface CrawlerBaseMapper {
 
     Integer countArticleMatchVideos();
 
-    List<ArticleMatchVideos> pageArticleMatchVideos(int offset, int pageSize);
+    List<ArticleMatchVideos> pageArticleMatchVideos();
 
     Integer countLongArticlesVideos();
 

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/LongArticlesCrawlerVideos.java

@@ -24,4 +24,5 @@ public class LongArticlesCrawlerVideos {
     private String coverOssPath;
     private String userId;
     private String traceId;
+    private Double score;
 }

+ 35 - 28
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java

@@ -3,6 +3,7 @@ package com.tzld.longarticle.recommend.server.service;
 import cn.hutool.core.collection.CollectionUtil;
 import com.alibaba.fastjson.JSONArray;
 import com.alibaba.fastjson.JSONObject;
+import com.google.common.collect.Lists;
 import com.tzld.longarticle.recommend.server.mapper.crawler.CrawlerBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
 import com.tzld.longarticle.recommend.server.model.dto.*;
@@ -59,37 +60,43 @@ public class DataFlushService {
     }
 
     public void flushLongArticlesCrawlerVideos(Integer pageNum) {
-        int pageSize = 1000;
-        if (pageNum == null) {
-            pageNum = 1;
-        }
-        int count = crawlerBaseMapper.countArticleMatchVideos();
-        int totalPage = count / pageSize + 1;
-        while (pageNum <= totalPage) {
-            int offset = (pageNum - 1) * pageSize;
-            List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos(offset, pageSize);
-            List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
-            for (ArticleMatchVideos video : list) {
-                if (!StringUtils.hasText(video.getVideoPath())) {
-                    continue;
+        List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos();
+        list = list.stream().filter(o ->StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
+        Map<String, List<ArticleMatchVideos>> map = list.stream().collect(Collectors.groupingBy(ArticleMatchVideos::getContentId));
+        List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
+        for (ArticleMatchVideos video : list) {
+            List<ArticleMatchVideos> mapList = map.get(video.getContentId());
+            List<Date> orderDate = mapList.stream().map(ArticleMatchVideos::getUpdateTime)
+                    .sorted().collect(Collectors.toList());
+            double score = 0.2;
+            for (int i = 0; i < orderDate.size(); i++) {
+                if (orderDate.get(i).equals(video.getUpdateTime())) {
+                    if (i == 0) {
+                        score = 1;
+                    } else if (i == 1) {
+                        score = 0.5;
+                    } else {
+                        break;
+                    }
                 }
-                LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
-                saveItem.setContentId(video.getContentId());
-                saveItem.setPlatform(video.getPlatform());
-                saveItem.setVideoTitle(video.getVideoTitle());
-                saveItem.setCrawlerTime(video.getUpdateTime());
-                saveItem.setVideoOssPath(video.getVideoPath());
-                saveItem.setCoverOssPath(video.getCoverPath());
-                saveItem.setUserId(video.getUid());
-                saveItem.setTraceId(video.getTraceId());
-                saveItem.setDownloadStatus(2);
-                batchSaveList.add(saveItem);
             }
-            if (!CollectionUtils.isEmpty(batchSaveList)) {
-                longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(batchSaveList);
+            LongArticlesCrawlerVideos saveItem = new LongArticlesCrawlerVideos();
+            saveItem.setContentId(video.getContentId());
+            saveItem.setPlatform(video.getPlatform());
+            saveItem.setVideoTitle(video.getVideoTitle());
+            saveItem.setCrawlerTime(video.getUpdateTime());
+            saveItem.setVideoOssPath(video.getVideoPath());
+            saveItem.setCoverOssPath(video.getCoverPath());
+            saveItem.setUserId(video.getUid());
+            saveItem.setTraceId(video.getTraceId());
+            saveItem.setDownloadStatus(2);
+            saveItem.setScore(score);
+            batchSaveList.add(saveItem);
+        }
+        if (!CollectionUtils.isEmpty(batchSaveList)) {
+            for (List<LongArticlesCrawlerVideos> partition : Lists.partition(batchSaveList, 1000)) {
+                longArticleBaseMapper.batchInsertLongArticlesCrawlerVideos(partition);
             }
-            log.info("flushLongArticlesCrawlerVideos pageNum:{} totalPage:{}", pageNum, totalPage);
-            pageNum++;
         }
     }
 

+ 2 - 2
long-article-recommend-service/src/main/resources/mapper/crawler/CrawlerBaseMapper.xml

@@ -22,11 +22,11 @@
         select * from long_articles_root_source_id order by rootSourceId limit #{offset}, #{pageSize}
     </select>
     <select id="countArticleMatchVideos" resultType="java.lang.Integer">
-        select count(1) from article_match_videos where DATE(update_time) &gt;= '2024-09-20'
+        select count(1) from article_match_videos where DATE(update_time) &lt; '2024-09-20'
     </select>
     <select id="pageArticleMatchVideos"
             resultType="com.tzld.longarticle.recommend.server.model.dto.ArticleMatchVideos">
-        select * from article_match_videos where DATE(update_time) &gt;= '2024-09-20' and success = 1 and content_status = 2 order by video_id limit #{offset}, #{pageSize}
+        select * from article_match_videos where DATE(update_time) &lt; '2024-09-20' order by video_id
     </select>
     <select id="countLongArticlesVideos" resultType="java.lang.Integer">
         select count(1) from long_articles_video where DATE(update_time) &gt;= '2024-09-20' and success = 1 and content_status = 2

+ 2 - 2
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -58,13 +58,13 @@
         INSERT INTO long_articles_crawler_videos_copy1 (content_id, out_video_id, platform, video_title, play_count,
                                                   like_count, share_count, publish_time, crawler_time, duration,
                                                   video_url, cover_url, download_status, video_oss_path, cover_oss_path,
-                                                  user_id, trace_id)
+                                                  user_id, trace_id, score)
         VALUES
         <foreach collection="list" item="item" separator=",">
             (#{item.contentId}, #{item.outVideoId}, #{item.platform}, #{item.videoTitle}, #{item.playCount},
              #{item.likeCount}, #{item.shareCount}, #{item.publishTime}, #{item.crawlerTime}, #{item.duration},
              #{item.videoUrl}, #{item.coverUrl}, #{item.downloadStatus}, #{item.videoOssPath}, #{item.coverOssPath},
-             #{item.userId}, #{item.traceId})
+             #{item.userId}, #{item.traceId}, #{item.score})
         </foreach>
     </insert>
     <insert id="batchInsertLongArticlesText" parameterType="list">