Ver Fonte

数据迁移

wangyunpeng há 7 meses atrás
pai
commit
224df258c0

+ 2 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/longArticle/LongArticleBaseMapper.java

@@ -28,4 +28,6 @@ public interface LongArticleBaseMapper {
     List<LongArticlesText> getNeedUpdateRecords();
 
     int updateLongArticlesText(LongArticlesText item);
+
+    List<LongArticlesRootSourceId> getLongArticlesRootSourceId(List<String> rootSourceIdList);
 }

+ 9 - 2
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/DataFlushService.java

@@ -57,8 +57,15 @@ public class DataFlushService {
         while (pageNum <= totalPage) {
             int offset = (pageNum - 1) * pageSize;
             List<LongArticlesRootSourceId> list = crawlerBaseMapper.pageLongArticlesRootSourceId(offset, pageSize);
-            longArticleBaseMapper.batchInsertLongArticlesRootSourceId(list);
-            log.info("flushLongArticlesRootSourceId pageNum:{} totalPage:{}", pageNum, totalPage);
+            List<String> rootSourceIdList = list.stream().map(LongArticlesRootSourceId::getRootSourceId)
+                   .distinct().collect(Collectors.toList());
+            List<LongArticlesRootSourceId> existsList = longArticleBaseMapper.getLongArticlesRootSourceId(rootSourceIdList);
+            Set<String> existsIds = existsList.stream().map(LongArticlesRootSourceId::getRootSourceId).collect(Collectors.toSet());
+            list = list.stream().filter(o -> !existsIds.contains(o.getRootSourceId())).collect(Collectors.toList());
+            if (CollectionUtil.isNotEmpty(list)) {
+                longArticleBaseMapper.batchInsertLongArticlesRootSourceId(list);
+            }
+            log.info("flushLongArticlesRootSourceId pageNum:{} totalPage:{} existsSize: {}", pageNum, totalPage, existsList.size());
             pageNum++;
         }
     }

+ 10 - 1
long-article-recommend-service/src/main/resources/mapper/longArticle/LongArticleBaseMapper.xml

@@ -46,7 +46,7 @@
         </foreach>
     </insert>
     <insert id="batchInsertLongArticlesRootSourceId" parameterType="list">
-        INSERT INTO long_articles_root_source_id_copy1 (root_source_id, account_name, gh_id, article_title, request_time,
+        INSERT INTO long_articles_root_source_id (root_source_id, account_name, gh_id, article_title, request_time,
         trace_id, push_type, video_id)
         VALUES
         <foreach collection="list" item="item" index="index" separator=",">
@@ -87,6 +87,15 @@
             resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesText">
         select * from long_articles_text_copy1 where kimi_title is null
     </select>
+
+    <select id="getLongArticlesRootSourceId"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.LongArticlesRootSourceId">
+        select * from long_articles_root_source_id where root_source_id in
+        <foreach collection="rootSourceIdList" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
+
     <update id="updateLongArticlesText">
         update long_articles_text_copy1
         set kimi_title = #{kimiTitle},