|
@@ -1,10 +1,12 @@
|
|
|
package com.tzld.supply.job;
|
|
package com.tzld.supply.job;
|
|
|
|
|
|
|
|
import cn.hutool.core.collection.CollectionUtil;
|
|
import cn.hutool.core.collection.CollectionUtil;
|
|
|
|
|
+import com.alibaba.fastjson.JSONObject;
|
|
|
import com.tzld.supply.api.DangerFaceRecognizeService;
|
|
import com.tzld.supply.api.DangerFaceRecognizeService;
|
|
|
import com.tzld.supply.api.SpiderApiService;
|
|
import com.tzld.supply.api.SpiderApiService;
|
|
|
import com.tzld.supply.common.enums.FindFaceStatusEnum;
|
|
import com.tzld.supply.common.enums.FindFaceStatusEnum;
|
|
|
import com.tzld.supply.common.enums.SpiderContentMediaStatusEnum;
|
|
import com.tzld.supply.common.enums.SpiderContentMediaStatusEnum;
|
|
|
|
|
+import com.tzld.supply.common.enums.SpiderContentMediaTypeEnum;
|
|
|
import com.tzld.supply.common.enums.SpiderContentStatusEnum;
|
|
import com.tzld.supply.common.enums.SpiderContentStatusEnum;
|
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
|
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMediaMapper;
|
|
import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMediaMapper;
|
|
@@ -56,51 +58,57 @@ public class ContentMediaSearchJob {
|
|
|
contentList.add(spiderContentMapper.selectByPrimaryKey(Long.parseLong(param)));
|
|
contentList.add(spiderContentMapper.selectByPrimaryKey(Long.parseLong(param)));
|
|
|
} else {
|
|
} else {
|
|
|
contentList = spiderMapperExt.getMediaSearchSpiderContent(SpiderContentStatusEnum.PASSED.getCode(),
|
|
contentList = spiderMapperExt.getMediaSearchSpiderContent(SpiderContentStatusEnum.PASSED.getCode(),
|
|
|
- startTime, endTime, "image");
|
|
|
|
|
|
|
+ startTime, endTime, SpiderContentMediaTypeEnum.IMAGE.getMsg());
|
|
|
}
|
|
}
|
|
|
if (CollectionUtil.isEmpty(contentList)) {
|
|
if (CollectionUtil.isEmpty(contentList)) {
|
|
|
return ReturnT.SUCCESS;
|
|
return ReturnT.SUCCESS;
|
|
|
}
|
|
}
|
|
|
for (SpiderContent content : contentList) {
|
|
for (SpiderContent content : contentList) {
|
|
|
// 检查是否已经存在图片
|
|
// 检查是否已经存在图片
|
|
|
- Long mediaCount = getSpiderContentMediaCount(content.getId(), "image");
|
|
|
|
|
|
|
+ Long mediaCount = getSpiderContentMediaCount(content.getId(), SpiderContentMediaTypeEnum.IMAGE.getMsg());
|
|
|
if (mediaCount > 0) {
|
|
if (mediaCount > 0) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- List<SpiderBaiduImageItem> dataItems = spiderApiService.searchContentImage(content.getTitle());
|
|
|
|
|
- if (CollectionUtil.isEmpty(dataItems)) {
|
|
|
|
|
|
|
+ imageMediaSearch(content.getId(), content.getTitle());
|
|
|
|
|
+ if (StringUtils.isBlank(content.getKeyword())) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- List<SpiderContentMedia> saveList = new ArrayList<>();
|
|
|
|
|
- Long now = System.currentTimeMillis();
|
|
|
|
|
- for (SpiderBaiduImageItem dataItem : dataItems) {
|
|
|
|
|
- // 转存到OSS
|
|
|
|
|
- String fileName = String.format("supply/spider/image/%s_%d.jpg", content.getId(), System.currentTimeMillis());
|
|
|
|
|
- String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, dataItem.getUrl(), "image/jpeg");
|
|
|
|
|
- if (StringUtils.isBlank(fileUrl)) {
|
|
|
|
|
- log.warn("图片转存OSS失败,URL: {}", dataItem.getUrl());
|
|
|
|
|
- continue;
|
|
|
|
|
- }
|
|
|
|
|
-
|
|
|
|
|
- SpiderContentMedia media = new SpiderContentMedia();
|
|
|
|
|
- media.setContentId(content.getId());
|
|
|
|
|
- media.setMediaType("image");
|
|
|
|
|
- media.setSourceSite("百度图片");
|
|
|
|
|
- media.setUrl(dataItem.getUrl());
|
|
|
|
|
- media.setTitle(dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)));
|
|
|
|
|
- media.setOssKey(fileUrl);
|
|
|
|
|
- media.setStatus(SpiderContentMediaStatusEnum.WAITING.getCode());
|
|
|
|
|
- media.setCreateTime(now);
|
|
|
|
|
- media.setUpdateTime(now);
|
|
|
|
|
- saveList.add(media);
|
|
|
|
|
|
|
+ List<String> keywords = JSONObject.parseArray(content.getKeyword(), String.class);
|
|
|
|
|
+ if (CollectionUtil.isEmpty(keywords)) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ for (String keyword : keywords) {
|
|
|
|
|
+ imageMediaSearch(content.getId(), keyword);
|
|
|
}
|
|
}
|
|
|
- spiderMapperExt.batchInsertSpiderContentMedia(saveList);
|
|
|
|
|
-
|
|
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
return ReturnT.SUCCESS;
|
|
return ReturnT.SUCCESS;
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private void imageMediaSearch(Long contentId, String keyword) {
|
|
|
|
|
+ List<SpiderBaiduImageItem> dataItems = spiderApiService.searchContentImage(keyword);
|
|
|
|
|
+ if (CollectionUtil.isEmpty(dataItems)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ List<SpiderContentMedia> saveList = new ArrayList<>();
|
|
|
|
|
+ Long now = System.currentTimeMillis();
|
|
|
|
|
+ for (SpiderBaiduImageItem dataItem : dataItems) {
|
|
|
|
|
+ // 转存到OSS
|
|
|
|
|
+ String fileName = String.format("supply/spider/image/%s_%d.jpg", contentId, System.currentTimeMillis());
|
|
|
|
|
+ String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, dataItem.getUrl(), "image/jpeg");
|
|
|
|
|
+ if (StringUtils.isBlank(fileUrl)) {
|
|
|
|
|
+ log.warn("图片转存OSS失败,URL: {}", dataItem.getUrl());
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ SpiderContentMedia media = buildMedia(contentId, SpiderContentMediaTypeEnum.IMAGE.getMsg(),
|
|
|
|
|
+ "百度图片", dataItem.getUrl(), null,
|
|
|
|
|
+ dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)), fileUrl,
|
|
|
|
|
+ SpiderContentMediaStatusEnum.WAITING.getCode(), now);
|
|
|
|
|
+ saveList.add(media);
|
|
|
|
|
+ }
|
|
|
|
|
+ spiderMapperExt.batchInsertSpiderContentMedia(saveList);
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
private Long getSpiderContentMediaCount(Long contentId, String mediaType) {
|
|
private Long getSpiderContentMediaCount(Long contentId, String mediaType) {
|
|
|
SpiderContentMediaExample example = new SpiderContentMediaExample();
|
|
SpiderContentMediaExample example = new SpiderContentMediaExample();
|
|
|
example.createCriteria().andContentIdEqualTo(contentId).andMediaTypeEqualTo(mediaType);
|
|
example.createCriteria().andContentIdEqualTo(contentId).andMediaTypeEqualTo(mediaType);
|
|
@@ -115,7 +123,7 @@ public class ContentMediaSearchJob {
|
|
|
}
|
|
}
|
|
|
Long endTime = startTime + 86400 * 1000;
|
|
Long endTime = startTime + 86400 * 1000;
|
|
|
List<SpiderContentMedia> mediaList = spiderMapperExt.getMediaSearchSpiderContentMedia(SpiderContentStatusEnum.PASSED.getCode(),
|
|
List<SpiderContentMedia> mediaList = spiderMapperExt.getMediaSearchSpiderContentMedia(SpiderContentStatusEnum.PASSED.getCode(),
|
|
|
- startTime, endTime, SpiderContentMediaStatusEnum.WAITING.getCode(), "image");
|
|
|
|
|
|
|
+ startTime, endTime, SpiderContentMediaStatusEnum.WAITING.getCode(), SpiderContentMediaTypeEnum.IMAGE.getMsg());
|
|
|
if (CollectionUtil.isEmpty(mediaList)) {
|
|
if (CollectionUtil.isEmpty(mediaList)) {
|
|
|
return ReturnT.SUCCESS;
|
|
return ReturnT.SUCCESS;
|
|
|
}
|
|
}
|
|
@@ -150,59 +158,71 @@ public class ContentMediaSearchJob {
|
|
|
contentList.add(spiderContentMapper.selectByPrimaryKey(Long.parseLong(param)));
|
|
contentList.add(spiderContentMapper.selectByPrimaryKey(Long.parseLong(param)));
|
|
|
} else {
|
|
} else {
|
|
|
contentList = spiderMapperExt.getMediaSearchSpiderContent(SpiderContentStatusEnum.PASSED.getCode(),
|
|
contentList = spiderMapperExt.getMediaSearchSpiderContent(SpiderContentStatusEnum.PASSED.getCode(),
|
|
|
- startTime, endTime, "video");
|
|
|
|
|
|
|
+ startTime, endTime, SpiderContentMediaTypeEnum.VIDEO.getMsg());
|
|
|
}
|
|
}
|
|
|
if (CollectionUtil.isEmpty(contentList)) {
|
|
if (CollectionUtil.isEmpty(contentList)) {
|
|
|
return ReturnT.SUCCESS;
|
|
return ReturnT.SUCCESS;
|
|
|
}
|
|
}
|
|
|
for (SpiderContent content : contentList) {
|
|
for (SpiderContent content : contentList) {
|
|
|
// 检查是否已经存在视频
|
|
// 检查是否已经存在视频
|
|
|
- Long mediaCount = getSpiderContentMediaCount(content.getId(), "video");
|
|
|
|
|
|
|
+ Long mediaCount = getSpiderContentMediaCount(content.getId(), SpiderContentMediaTypeEnum.VIDEO.getMsg());
|
|
|
if (mediaCount > 0) {
|
|
if (mediaCount > 0) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- List<SpiderHKVideoDetailItem> dataItems = spiderApiService.searchContentVideo(content.getTitle());
|
|
|
|
|
- if (CollectionUtil.isEmpty(dataItems)) {
|
|
|
|
|
|
|
+ videoMediaSearch(content.getId(), content.getTitle());
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ return ReturnT.SUCCESS;
|
|
|
|
|
+ }
|
|
|
|
|
+
|
|
|
|
|
+ private void videoMediaSearch(Long contentId, String keyword) {
|
|
|
|
|
+ List<SpiderHKVideoDetailItem> dataItems = spiderApiService.searchContentVideo(keyword);
|
|
|
|
|
+ if (CollectionUtil.isEmpty(dataItems)) {
|
|
|
|
|
+ return;
|
|
|
|
|
+ }
|
|
|
|
|
+ List<SpiderContentMedia> saveList = new ArrayList<>();
|
|
|
|
|
+ Long now = System.currentTimeMillis();
|
|
|
|
|
+ for (SpiderHKVideoDetailItem dataItem : dataItems) {
|
|
|
|
|
+ if (CollectionUtil.isEmpty(dataItem.getVideoURLList())) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ SpiderHKVideoDetailItem.VideoURLList videoURLList = dataItem.getVideoURLList().get(0);
|
|
|
|
|
+ // 时长过滤
|
|
|
|
|
+ if (videoURLList.getVideoDuration() > 300 || videoURLList.getVideoDuration() < 10) {
|
|
|
continue;
|
|
continue;
|
|
|
}
|
|
}
|
|
|
- List<SpiderContentMedia> saveList = new ArrayList<>();
|
|
|
|
|
- Long now = System.currentTimeMillis();
|
|
|
|
|
- for (SpiderHKVideoDetailItem dataItem : dataItems) {
|
|
|
|
|
- if (CollectionUtil.isEmpty(dataItem.getVideoURLList())) {
|
|
|
|
|
- continue;
|
|
|
|
|
- }
|
|
|
|
|
- SpiderHKVideoDetailItem.VideoURLList videoURLList = dataItem.getVideoURLList().get(0);
|
|
|
|
|
- // 时长过滤
|
|
|
|
|
- if (videoURLList.getVideoDuration() > 300 || videoURLList.getVideoDuration() < 10) {
|
|
|
|
|
- continue;
|
|
|
|
|
- }
|
|
|
|
|
- if (StringUtils.isBlank(videoURLList.getVideoURL())) {
|
|
|
|
|
- continue;
|
|
|
|
|
- }
|
|
|
|
|
- String fileName = String.format("supply/spider/video/%s_%d.mp4", content.getId(), System.currentTimeMillis());
|
|
|
|
|
- String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, videoURLList.getVideoURL(), "video/mp4");
|
|
|
|
|
- if (StringUtils.isBlank(fileUrl)) {
|
|
|
|
|
- log.warn("视频转存OSS失败,URL: {}", videoURLList.getVideoURL());
|
|
|
|
|
- continue;
|
|
|
|
|
- }
|
|
|
|
|
- SpiderContentMedia media = new SpiderContentMedia();
|
|
|
|
|
- media.setContentId(content.getId());
|
|
|
|
|
- media.setMediaType("video");
|
|
|
|
|
- media.setSourceSite("好看视频");
|
|
|
|
|
- media.setUrl(videoURLList.getVideoURL());
|
|
|
|
|
- media.setDuration((int) videoURLList.getVideoDuration() * 1000);
|
|
|
|
|
- media.setTitle(dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)));
|
|
|
|
|
- media.setOssKey(fileUrl);
|
|
|
|
|
- media.setStatus(SpiderContentMediaStatusEnum.PASSED.getCode());
|
|
|
|
|
- media.setCreateTime(now);
|
|
|
|
|
- media.setUpdateTime(now);
|
|
|
|
|
- saveList.add(media);
|
|
|
|
|
|
|
+ if (StringUtils.isBlank(videoURLList.getVideoURL())) {
|
|
|
|
|
+ continue;
|
|
|
|
|
+ }
|
|
|
|
|
+ String fileName = String.format("supply/spider/video/%s_%d.mp4", contentId, System.currentTimeMillis());
|
|
|
|
|
+ String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, videoURLList.getVideoURL(), "video/mp4");
|
|
|
|
|
+ if (StringUtils.isBlank(fileUrl)) {
|
|
|
|
|
+ log.warn("视频转存OSS失败,URL: {}", videoURLList.getVideoURL());
|
|
|
|
|
+ continue;
|
|
|
}
|
|
}
|
|
|
- spiderMapperExt.batchInsertSpiderContentMedia(saveList);
|
|
|
|
|
|
|
+ SpiderContentMedia media = buildMedia(contentId, SpiderContentMediaTypeEnum.VIDEO.getMsg(),
|
|
|
|
|
+ "好看视频", videoURLList.getVideoURL(), (int) videoURLList.getVideoDuration() * 1000,
|
|
|
|
|
+ dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)), fileUrl,
|
|
|
|
|
+ SpiderContentMediaStatusEnum.PASSED.getCode(), now);
|
|
|
|
|
+ saveList.add(media);
|
|
|
}
|
|
}
|
|
|
-
|
|
|
|
|
- return ReturnT.SUCCESS;
|
|
|
|
|
|
|
+ spiderMapperExt.batchInsertSpiderContentMedia(saveList);
|
|
|
}
|
|
}
|
|
|
|
|
|
|
|
|
|
+ private SpiderContentMedia buildMedia(Long contentId, String mediaType, String sourceSite, String url,
|
|
|
|
|
+ Integer duration, String title, String ossKey, Integer status, Long now) {
|
|
|
|
|
+ SpiderContentMedia media = new SpiderContentMedia();
|
|
|
|
|
+ media.setContentId(contentId);
|
|
|
|
|
+ media.setMediaType(mediaType);
|
|
|
|
|
+ media.setSourceSite(sourceSite);
|
|
|
|
|
+ media.setUrl(url);
|
|
|
|
|
+ media.setDuration(duration);
|
|
|
|
|
+ media.setTitle(title);
|
|
|
|
|
+ media.setOssKey(ossKey);
|
|
|
|
|
+ media.setStatus(status);
|
|
|
|
|
+ media.setCreateTime(now);
|
|
|
|
|
+ media.setUpdateTime(now);
|
|
|
|
|
+ return media;
|
|
|
|
|
+ }
|
|
|
|
|
|
|
|
}
|
|
}
|