فهرست منبع

小程序匹配新老系统数据迁移

wangyunpeng 7 ماه پیش
والد
کامیت
68bde4de9e

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -30,4 +30,8 @@ public interface LongArticleBaseMapper {
     int updateLongArticlesText(LongArticlesText item);
 
     List<LongArticlesRootSourceId> getLongArticlesRootSourceId(List<String> rootSourceIdList);
+
+    List<GetOffVideos> getGetOffVideos(List<Long> videoIds);
+
+    List<LongArticlesMatchVideos> getLongArticlesMatchVideos(List<String> traceIds);
 }

+ 22 - 7
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java

@@ -41,7 +41,14 @@ public class DataFlushService {
         while (pageNum <= totalPage) {
             int offset = (pageNum - 1) * pageSize;
             List<GetOffVideos> list = crawlerBaseMapper.pageGetOffVideos(offset, pageSize);
-            longArticleBaseMapper.batchInsertGetOffVideos(list);
+            List<Long> videoIds = list.stream().map(GetOffVideos::getVideoId)
+                    .distinct().collect(Collectors.toList());
+            List<GetOffVideos> existsList = longArticleBaseMapper.getGetOffVideos(videoIds);
+            Set<Long> existsIds = existsList.stream().map(GetOffVideos::getVideoId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getVideoId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertGetOffVideos(list);
+            }
             log.info("flushGetOffVideos pageNum:{} totalPage:{}", pageNum, totalPage);
             pageNum++;
         }
@@ -58,7 +65,7 @@ public class DataFlushService {
             int offset = (pageNum - 1) * pageSize;
             List<LongArticlesRootSourceId> list = crawlerBaseMapper.pageLongArticlesRootSourceId(offset, pageSize);
             List<String> rootSourceIdList = list.stream().map(LongArticlesRootSourceId::getRootSourceId)
-                   .distinct().collect(Collectors.toList());
+                    .distinct().collect(Collectors.toList());
             List<LongArticlesRootSourceId> existsList = longArticleBaseMapper.getLongArticlesRootSourceId(rootSourceIdList);
             Set<String> existsIds = existsList.stream().map(LongArticlesRootSourceId::getRootSourceId).collect(Collectors.toSet());
             list = list.stream().filter(o -> !existsIds.contains(o.getRootSourceId())).collect(Collectors.toList());
@@ -72,7 +79,7 @@ public class DataFlushService {
 
     public void flushLongArticlesCrawlerVideos(Integer pageNum) {
         List<ArticleMatchVideos> list = crawlerBaseMapper.pageArticleMatchVideos();
-        list = list.stream().filter(o ->StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
+        list = list.stream().filter(o -> StringUtils.hasText(o.getVideoPath())).collect(Collectors.toList());
         Map<String, List<ArticleMatchVideos>> map = list.stream().collect(Collectors.groupingBy(ArticleMatchVideos::getContentId));
         List<LongArticlesCrawlerVideos> batchSaveList = new ArrayList<>();
         for (ArticleMatchVideos video : list) {
@@ -101,7 +108,7 @@ public class DataFlushService {
             saveItem.setUserId(video.getUid());
             saveItem.setTraceId(video.getTraceId());
             saveItem.setDownloadStatus(2);
-            saveItem.setScore(score);
+            saveItem.setScore(score / 1000);
             batchSaveList.add(saveItem);
         }
         if (!CollectionUtils.isEmpty(batchSaveList)) {
@@ -117,7 +124,7 @@ public class DataFlushService {
             pageNum = 1;
         }
         List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
-        Map<String, LongArticlesText> kimiTitleMap =  kimiTitleList.stream().collect(
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
                 Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
         int count = crawlerBaseMapper.countLongArticlesVideos();
         int totalPage = count / pageSize + 1;
@@ -180,7 +187,15 @@ public class DataFlushService {
                 }
             }
             if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
-                longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
+                List<String> traceIds = batchSaveLongArticlesMatchVideosList.stream().map(LongArticlesMatchVideos::getTraceId)
+                        .distinct().collect(Collectors.toList());
+                List<LongArticlesMatchVideos> existsList = longArticleBaseMapper.getLongArticlesMatchVideos(traceIds);
+                Set<String> existsIds = existsList.stream().map(LongArticlesMatchVideos::getTraceId).collect(Collectors.toSet());
+                batchSaveLongArticlesMatchVideosList = batchSaveLongArticlesMatchVideosList.stream()
+                        .filter(o -> !existsIds.contains(o.getTraceId())).collect(Collectors.toList());
+                if (CollectionUtil.isNotEmpty(batchSaveLongArticlesMatchVideosList)) {
+                    longArticleBaseMapper.batchInsertLongArticlesMatchVideos(batchSaveLongArticlesMatchVideosList);
+                }
             }
             log.info("flushLongArticlesVideos pageNum:{} totalPage:{}", pageNum, totalPage);
             pageNum++;
@@ -244,7 +259,7 @@ public class DataFlushService {
 
     public void flushLongArticlesText() {
         List<LongArticlesText> kimiTitleList = crawlerBaseMapper.getLongArticlesText();
-        Map<String, LongArticlesText> kimiTitleMap =  kimiTitleList.stream().collect(
+        Map<String, LongArticlesText> kimiTitleMap = kimiTitleList.stream().collect(
                 Collectors.toMap(LongArticlesText::getContentId, o -> o, (existing, replacement) -> replacement));
         List<LongArticlesText> updateList = longArticleBaseMapper.getNeedUpdateRecords();
         for (LongArticlesText update : updateList) {

+ 28 - 7
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -5,6 +5,7 @@
     <delete id="deleteByDateStrGreaterThanEqual">
         delete from datastat_sort_strategy where date_str >= #{dateStr}
     </delete>
+
     <select id="countGetOffVideos" resultType="java.lang.Long">
         select count(1) from get_off_videos limit #{offset}, #{pageSize}
     </select>
@@ -38,13 +39,21 @@
         </foreach>
     </insert>
 
+    <select id="getGetOffVideos" resultType="com.tzld.longarticle.recommend.server.model.dto.GetOffVideos">
+        select * from get_off_videos where video_id in
+        <foreach collection="videoIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
     <insert id="batchInsertGetOffVideos">
-        INSERT INTO get_off_videos_copy1 (video_id, publish_time, video_status, trace_id, get_off_time, check_status)
+        INSERT INTO get_off_videos (video_id, publish_time, video_status, trace_id, get_off_time, check_status)
         VALUES
         <foreach collection="list" item="item" separator=",">
             (#{item.videoId}, #{item.publishTime}, #{item.videoStatus}, #{item.traceId}, #{item.getOffTime}, #{item.checkStatus})
         </foreach>
     </insert>
+
     <insert id="batchInsertLongArticlesRootSourceId" parameterType="list">
         INSERT INTO long_articles_root_source_id (root_source_id, account_name, gh_id, article_title, request_time,
         trace_id, push_type, video_id)
@@ -54,8 +63,9 @@
              #{item.traceId}, #{item.pushType}, #{item.videoId})
         </foreach>
     </insert>
+
     <insert id="batchInsertLongArticlesCrawlerVideos" parameterType="list">
-        INSERT INTO long_articles_crawler_videos_copy1 (content_id, out_video_id, platform, video_title, play_count,
+        INSERT INTO long_articles_crawler_videos (content_id, out_video_id, platform, video_title, play_count,
                                                   like_count, share_count, publish_time, crawler_time, duration,
                                                   video_url, cover_url, download_status, video_oss_path, cover_oss_path,
                                                   user_id, trace_id, score)
@@ -67,8 +77,9 @@
              #{item.userId}, #{item.traceId}, #{item.score})
         </foreach>
     </insert>
+
     <insert id="batchInsertLongArticlesText" parameterType="list">
-        INSERT INTO long_articles_text_copy1 (content_id, article_title, article_text, kimi_title, kimi_summary,
+        INSERT INTO long_articles_text (content_id, article_title, article_text, kimi_title, kimi_summary,
                                               kimi_keys, kimi_status)
         VALUES
         <foreach collection="list" item="item" separator=",">
@@ -78,14 +89,15 @@
     </insert>
 
     <select id="getLongArticlesTextByContentIds" resultType="java.lang.String">
-        select content_id from long_articles_text_copy1 where content_id in
+        select content_id from long_articles_text where content_id in
         <foreach collection="contentIds" item="item" open="(" close=")" separator=",">
             #{item}
         </foreach>
     </select>
+
     <select id="getNeedUpdateRecords"
             resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
-        select * from long_articles_text_copy1 where kimi_title is null
+        select * from long_articles_text where kimi_title is null
     </select>
 
     <select id="getLongArticlesRootSourceId"
@@ -97,14 +109,23 @@
     </select>
 
     <update id="updateLongArticlesText">
-        update long_articles_text_copy1
+        update long_articles_text
         set kimi_title = #{kimiTitle},
             kimi_summary = #{kimiSummary},
             kimi_keys = #{kimiKeys}
         where content_id = #{contentId}
     </update>
+
+    <select id="getLongArticlesMatchVideos"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesMatchVideos">
+        select * from long_articles_match_videos where trace_id in
+        <foreach collection="traceIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
     <insert id="batchInsertLongArticlesMatchVideos" parameterType="list">
-        INSERT INTO long_articles_match_videos_copy1 (trace_id, content_id, flow_pool_level, gh_id, account_name,
+        INSERT INTO long_articles_match_videos (trace_id, content_id, flow_pool_level, gh_id, account_name,
                                                       content_status, success_status, request_timestamp, response,
                                                       process_times)
         VALUES