浏览代码

抓取热榜 过滤来源

wangyunpeng 2 周之前
父节点
当前提交
b0fe4043ae
共有 1 个文件被更改,包括 7 次插入0 次删除
  1. 7 0
      core/src/main/java/com/tzld/supply/job/SpiderJob.java

+ 7 - 0
core/src/main/java/com/tzld/supply/job/SpiderJob.java

@@ -1,5 +1,6 @@
 package com.tzld.supply.job;
 
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
 import com.tzld.supply.api.SpiderApiService;
 import com.tzld.supply.common.enums.SpiderContentStatusEnum;
 import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
@@ -37,6 +38,8 @@ public class SpiderJob {
 
     @Value("${get.rank.page.size:5}")
     private Integer getRankPageSize;
+    @ApolloJsonValue("${spider.filter.source.list:[\"知乎\",\"知乎日报\",\"果壳\",\"壹心理\",\"China Daily\",\"NASA \uD83C\uDF0D\",\"wikiHow 中文\"]}")
+    private List<String> filterSourceList;
 
     @XxlJob("spiderTaskJob")
     public ReturnT<String> spiderTaskJob(String param) {
@@ -74,6 +77,10 @@ public class SpiderJob {
                         || rankListItem.getTitle().matches("^[A-Za-z0-9\\s!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~’‘“”]+$")) {
                     continue;
                 }
+                // Filter 过滤来源
+                if (CollectionUtils.isNotEmpty(filterSourceList) && filterSourceList.contains(dataItem.getSource())) {
+                    continue;
+                }
                 // Filter 过滤已存在
                 if (checkContentExist(rankListItem) || titles.contains(rankListItem.getTitle())) {
                     continue;