|
@@ -1,5 +1,6 @@
|
|
|
package com.tzld.supply.job;
|
|
package com.tzld.supply.job;
|
|
|
|
|
|
|
|
|
|
+import com.ctrip.framework.apollo.spring.annotation.ApolloJsonValue;
|
|
|
import com.tzld.supply.api.SpiderApiService;
|
|
import com.tzld.supply.api.SpiderApiService;
|
|
|
import com.tzld.supply.common.enums.SpiderContentStatusEnum;
|
|
import com.tzld.supply.common.enums.SpiderContentStatusEnum;
|
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
|
|
@@ -37,6 +38,8 @@ public class SpiderJob {
|
|
|
|
|
|
|
|
@Value("${get.rank.page.size:5}")
|
|
@Value("${get.rank.page.size:5}")
|
|
|
private Integer getRankPageSize;
|
|
private Integer getRankPageSize;
|
|
|
|
|
+ @ApolloJsonValue("${spider.filter.source.list:[\"知乎\",\"知乎日报\",\"果壳\",\"壹心理\",\"China Daily\",\"NASA \uD83C\uDF0D\",\"wikiHow 中文\"]}")
|
|
|
|
|
+ private List<String> filterSourceList;
|
|
|
|
|
|
|
|
@XxlJob("spiderTaskJob")
|
|
@XxlJob("spiderTaskJob")
|
|
|
public ReturnT<String> spiderTaskJob(String param) {
|
|
public ReturnT<String> spiderTaskJob(String param) {
|
|
@@ -74,6 +77,10 @@ public class SpiderJob {
|
|
|
|| rankListItem.getTitle().matches("^[A-Za-z0-9\\s!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~’‘“”]+$")) {
|
|
|| rankListItem.getTitle().matches("^[A-Za-z0-9\\s!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~’‘“”]+$")) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
|
|
+ // Filter 过滤来源
|
|
|
|
|
+ if (CollectionUtils.isNotEmpty(filterSourceList) && filterSourceList.contains(dataItem.getSource())) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
// Filter 过滤已存在
|
|
// Filter 过滤已存在
|
|
|
if (checkContentExist(rankListItem) || titles.contains(rankListItem.getTitle())) {
|
|
if (checkContentExist(rankListItem) || titles.contains(rankListItem.getTitle())) {
|
|
|
continue;
|
|
continue;
|