Jelajahi Sumber

定时处理文章品类

wangyunpeng 8 bulan lalu
induk
melakukan
0bcf962905

+ 4 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/mapper/aigc/AigcBaseMapper.java

@@ -38,5 +38,9 @@ public interface AigcBaseMapper {
 
     CrawlerContent getCrawlerContentByChannelContentId(String channelContentId);
 
+    List<CrawlerContent> getCrawlerContentByChannelContentIdIn(List<String> channelContentIds);
+
     List<ProduceContentDTO> getProduceContentByPlanId(String planId);
+
+    List<ProducePlanExeRecord> getAllByProducePlanId(List<String> producePlanIds);
 }

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/model/dto/CrawlerContent.java

@@ -5,6 +5,7 @@ import lombok.Data;
 @Data
 public class CrawlerContent {
     private String channelContentId;
+    private String crawlerPlanId;
     private String ghId;
     private String title;
     private Long publishTimestamp;

+ 1 - 0
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/repository/longArticle/ArticleCategoryRepository.java

@@ -13,4 +13,5 @@ public interface ArticleCategoryRepository extends JpaRepository<ArticleCategory
 
     List<ArticleCategory> getByStatus(Integer status);
 
+    List<ArticleCategory> getAllByChannelContentIdIn(List<String> channelContentIds);
 }

+ 35 - 23
long-article-recommend-service/src/main/java/com/tzld/longarticle/recommend/server/service/recommend/ArticleService.java

@@ -325,6 +325,7 @@ public class ArticleService {
         }
     }
 
+
     public void articleCategory() {
         List<ArticleCrawlerPlan> articleCrawlerPlanList = articleCrawlerPlanRepository.getByStatus(StatusEnum.ZERO.getCode());
         List<String> producePlanIds = Arrays.asList("20240802021606053813696", "20240802080355355308981",
@@ -347,24 +348,32 @@ public class ArticleService {
                 item.setCreateTimestamp(now);
                 saveList.add(item);
             }
-            longArticleBaseMapper.batchInsertArticleCategory(saveList);
+            if (CollectionUtils.isNotEmpty(saveList)) {
+                longArticleBaseMapper.batchInsertArticleCategory(saveList);
+            }
             // 抓取计划超过5天设置为已处理
-            String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 6);
-            if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400000 * 5) {
+            String dateStr = crawlerPlan.getCrawlerPlanId().substring(0, 8);
+            if (DateUtils.dateStrToTimestamp(dateStr, "yyyyMMdd") < now - 86400 * 5) {
                 crawlerPlan.setStatus(StatusEnum.ONE.getCode());
                 crawlerPlan.setUpdateTimestamp(now);
                 articleCrawlerPlanRepository.save(crawlerPlan);
             }
         }
         List<ArticleCategory> dealList = articleCategoryRepository.getByStatus(ArticleCategoryStatusEnum.WAITING.getCode());
-        for (List<ArticleCategory> partition : Lists.partition(dealList, 20)) {
-            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).collect(Collectors.toList());
+        List<List<ArticleCategory>> partitionList = Lists.partition(dealList, 20);
+        for (List<ArticleCategory> partition : partitionList) {
+            List<String> partitionTitles = partition.stream().map(ArticleCategory::getTitle).distinct().collect(Collectors.toList());
             String prompt = buildKimiPrompt(partitionTitles);
             KimiResult kimiResult = kimiApiService.requestOfficialApi(prompt, null, null);
             long now = System.currentTimeMillis();
             JSONObject obj = null;
             if (kimiResult.isSuccess()) {
-                obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                try {
+                    obj = JSONObject.parseObject(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                } catch (Exception e) {
+                    log.error(kimiResult.getResponse().getChoices().get(0).getMessage().getContent());
+                    continue;
+                }
             }
             for (ArticleCategory articleCategory : partition) {
                 articleCategory.setKimiResult(kimiResult.getResponseStr());
@@ -395,27 +404,30 @@ public class ArticleService {
                 "    政治新闻\n" +
                 "    军事新闻\n" +
                 "    为了更好地完成任务,可参考下列对文章标题的分类:\n" +
-                "    大舅病了,我取了三万元送过去,病房门口听到舅妈的话我改了主意\\t家长里短\n" +
-                "    能活到90岁的老人,基本上在70岁的时候,就不再做这些事了!\\t健康养生\n" +
-                "    去医院看望病人时,切忌带这4样东西,再亲近也不行,这是做人的根本\\t生活知识\n" +
-                "    上海一女子去饭店吃生煎包,戳了个洞想凉一凉,往里一看,瞬间惊呆了\\t奇闻趣事\n" +
-                "    卫生间放一把食盐,一年能省下好几百,涨新知识\\t生活知识\n" +
-                "    中国有一古寺,庙不大,却有武警24小时站岗,到底有何“过人”之处\\t奇闻趣事\n" +
-                "    1974年,苏联外长故意拿邓小平身高“取笑”,邓小平一句话轻松反击\\t历史人物\n" +
-                "    中国最美的女将军:上世纪曾家喻户晓,如今仍然健在\\t历史人物\n" +
-                "    北大才女蒙曼48岁仍未婚,被问最想嫁给谁,一个名字让全场笑喷\\t名人八卦\n" +
-                "    广东一老人去世,家人把老人的旧床垫扔了,环卫工人看到后,竟发现里面藏了15万元现金!家人傻眼了\\t奇闻趣事\n" +
-                "    你返回的结果为每行一个文章标题和它对应的类目,以制表符分隔,具体格式如下:\n" +
-                "    文章标题\\t类目\n" +
+                "    {" +
+                "    \"大舅病了,我取了三万元送过去,病房门口听到舅妈的话我改了主意\": \"家长里短\",\n" +
+                "    \"能活到90岁的老人,基本上在70岁的时候,就不再做这些事了!\": \"健康养生\",\n" +
+                "    \"去医院看望病人时,切忌带这4样东西,再亲近也不行,这是做人的根本\": \"生活知识\",\n" +
+                "    \"上海一女子去饭店吃生煎包,戳了个洞想凉一凉,往里一看,瞬间惊呆了\": \"奇闻趣事\",\n" +
+                "    \"卫生间放一把食盐,一年能省下好几百,涨新知识\": \"生活知识\",\n" +
+                "    \"中国有一古寺,庙不大,却有武警24小时站岗,到底有何“过人”之处\": \"奇闻趣事\",\n" +
+                "    \"1974年,苏联外长故意拿邓小平身高“取笑”,邓小平一句话轻松反击\": \"历史人物\",\n" +
+                "    \"中国最美的女将军:上世纪曾家喻户晓,如今仍然健在\": \"历史人物\",\n" +
+                "    \"北大才女蒙曼48岁仍未婚,被问最想嫁给谁,一个名字让全场笑喷\": \"名人八卦\",\n" +
+                "    \"广东一老人去世,家人把老人的旧床垫扔了,环卫工人看到后,竟发现里面藏了15万元现金!家人傻眼了\": \"奇闻趣事\",\n" +
+                "    }" +
+                "    最后输出结果请用JSON格式输出,key为title,value为类目,仅输出JSON,不要markdown格式,不要任何其他内容," +
+                "    并且内容可以被 fastJSON 的JSONObject.parseObject转换为JSON对象\n" +
+                "    当标题的开头或结尾为以下字符时“”“,则在标题的开头或结尾增加\" " +
+                "    输出结果格式如下:\n" +
+                "    {" +
+                "        \"浙江老人用“假钱”吃霸王餐9年,离世后,老板却崩溃大哭:“每天都在等他!”\": \"奇闻趣事\"," +
+                "        \"“最美婴儿”迅速走红,像在娘胎里整过容,网友:看到第一眼就想抱回家\": \"奇闻趣事\"" +
+                "    }" +
                 "    以下是需要分析的文章标题列表,每一行是一个标题:\n");
         for (String title : titleList) {
             prompt.append(title).append("\n");
         }
-        prompt.append("最后输出结果请用JSON格式输出,key为title,value为类目,仅输出JSON,不要任何其他内容 格式如下:\n");
-        prompt.append("{" +
-                "\"大舅病了,我取了三万元送过去,病房门口听到舅妈的话我改了主意\": \"家长里短\",\n" +
-                "\"能活到90岁的老人,基本上在70岁的时候,就不再做这些事了!\": \"健康养生\"\n" +
-                "}");
         return prompt.toString();
     }
 }

+ 22 - 0
long-article-recommend-service/src/main/resources/mapper/aigc/AigcBaseMapper.xml

@@ -176,6 +176,28 @@
           on record.plan_exe_id = output.plan_exe_id and output.produce_module_type = 3
         where record.plan_id = #{planId} and record.status = 2 and audit_status = 1
     </select>
+    <select id="getAllByProducePlanId"
+            resultType="com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord">
+        select *
+        from produce_plan_exe_record
+        where plan_id in
+        <foreach collection="producePlanIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+        and status = 2
+    </select>
+    <select id="getCrawlerContentByChannelContentIdIn"
+            resultType="com.tzld.longarticle.recommend.server.model.dto.CrawlerContent">
+        select cc.channel_content_id, cprr.plan_id as crawlerPlanId, ca.wx_gh as ghId, cc.title, cc.publish_timestamp
+        from crawler_content cc
+        join (select channel_source_id, max(plan_id) as plan_id from crawler_plan_result_rel group by channel_source_id) cprr
+            on cc.channel_content_id = cprr.channel_source_id
+        join crawler_account ca on cc.channel_account_id = ca.channel_account_id
+        where cc.channel_content_id in
+        <foreach collection="channelContentIds" item="item" open="(" close=")" separator=",">
+            #{item}
+        </foreach>
+    </select>
 
 
 </mapper>

+ 46 - 3
long-article-recommend-service/src/test/java/com/tzld/longarticle/recommend/server/XxlJobTest.java

@@ -2,8 +2,13 @@ package com.tzld.longarticle.recommend.server;
 
 import com.tzld.longarticle.recommend.server.mapper.aigc.AigcBaseMapper;
 import com.tzld.longarticle.recommend.server.mapper.longArticle.LongArticleBaseMapper;
+import com.tzld.longarticle.recommend.server.model.dto.CrawlerContent;
 import com.tzld.longarticle.recommend.server.model.dto.ProduceContentDTO;
+import com.tzld.longarticle.recommend.server.model.entity.aigc.ProducePlanExeRecord;
+import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticleCategory;
 import com.tzld.longarticle.recommend.server.model.entity.longArticle.ArticlePoolPromotionSource;
+import com.tzld.longarticle.recommend.server.model.vo.ProduceContentCrawlerVO;
+import com.tzld.longarticle.recommend.server.repository.longArticle.ArticleCategoryRepository;
 import com.tzld.longarticle.recommend.server.repository.longArticle.ArticlePoolPromotionSourceRepository;
 import com.tzld.longarticle.recommend.server.service.recommend.ArticleService;
 import com.tzld.longarticle.recommend.server.util.Md5Util;
@@ -11,9 +16,8 @@ import org.junit.jupiter.api.Test;
 import org.springframework.boot.test.context.SpringBootTest;
 
 import javax.annotation.Resource;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.List;
+import java.util.*;
+import java.util.function.Function;
 import java.util.stream.Collectors;
 
 @SpringBootTest(classes = Application.class)
@@ -26,6 +30,8 @@ public class XxlJobTest {
     private LongArticleBaseMapper longArticleBaseMapper;
     @Resource
     private ArticlePoolPromotionSourceRepository articlePoolPromotionSourceRepository;
+    @Resource
+    private ArticleCategoryRepository articleCategoryRepository;
 
     @Test
     public void test() {
@@ -49,4 +55,41 @@ public class XxlJobTest {
             articlePoolPromotionSourceRepository.saveAll(saveList);
         }
     }
+
+    @Test
+    public void articleCategoryTest() {
+        List<String> producePlanIds = Arrays.asList("20240802021606053813696", "20240802080355355308981",
+                "20240805154433785506170", "20240805154359027876170", "20241024100016206421084", "20241030070010871546586");
+        List<ProducePlanExeRecord> produceContentList = aigcBaseMapper.getAllByProducePlanId(producePlanIds);
+        List<String> channelContentIds = produceContentList.stream().map(ProducePlanExeRecord::getChannelContentId).distinct().collect(Collectors.toList());
+        List<ArticleCategory> articleCategoryList = articleCategoryRepository.getAllByChannelContentIdIn(channelContentIds);
+        List<String>  articleCategoryIds = articleCategoryList.stream().map(ArticleCategory::getChannelContentId).collect(Collectors.toList());
+        List<ProduceContentCrawlerVO> list = produceContentList.stream().filter(o -> !articleCategoryIds.contains(o.getChannelContentId())).map(o -> {
+            ProduceContentCrawlerVO item = new ProduceContentCrawlerVO();
+            item.setChannelContentId(o.getChannelContentId());
+            item.setProduceContentId(o.getPlanExeId());
+            return item;
+        }).collect(Collectors.toList());
+        channelContentIds = channelContentIds.stream().filter(o -> !articleCategoryIds.contains(o)).collect(Collectors.toList());
+        List<CrawlerContent> crawlerContentList = aigcBaseMapper.getCrawlerContentByChannelContentIdIn(channelContentIds);
+        Map<String, CrawlerContent> map = crawlerContentList.stream().collect(Collectors.toMap(CrawlerContent::getChannelContentId, Function.identity()));
+        long now = System.currentTimeMillis();
+        List<ArticleCategory> saveList = new ArrayList<>();
+        for (ProduceContentCrawlerVO vo : list) {
+            ArticleCategory item = new ArticleCategory();
+            item.setChannelContentId(vo.getChannelContentId());
+            item.setProduceContentId(vo.getProduceContentId());
+            CrawlerContent crawlerContent = map.get(vo.getChannelContentId());
+            if (Objects.nonNull(crawlerContent)) {
+                String title = crawlerContent.getTitle();
+                item.setCrawlerPlanId(crawlerContent.getCrawlerPlanId());
+                item.setTitle(title);
+                item.setTitleMd5(Md5Util.encoderByMd5(title));
+                item.setCreateTimestamp(now);
+                saveList.add(item);
+            }
+        }
+        longArticleBaseMapper.batchInsertArticleCategory(saveList);
+
+    }
 }