Jelajahi Sumber

媒体资源搜索

wangyunpeng 3 hari lalu
induk
melakukan
c85a7f7cbc

+ 44 - 11
core/src/main/java/com/tzld/supply/api/SpiderApiService.java

@@ -1,5 +1,6 @@
 package com.tzld.supply.api;
 
+import cn.hutool.core.collection.CollectionUtil;
 import com.alibaba.fastjson.JSONObject;
 import com.alibaba.fastjson.TypeReference;
 import com.tzld.supply.model.entity.*;
@@ -13,6 +14,7 @@ import javax.annotation.PostConstruct;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.Objects;
 import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
 
@@ -54,9 +56,10 @@ public class SpiderApiService {
 
     /**
      * 重试执行方法
-     * @param callable 业务逻辑回调(返回ResultWrapper,包含状态码和数据)
+     *
+     * @param callable   业务逻辑回调(返回ResultWrapper,包含状态码和数据)
      * @param maxRetries 最大重试次数
-     * @param delayMs 重试延迟(毫秒)
+     * @param delayMs    重试延迟(毫秒)
      * @return 业务数据(T)
      */
     private <T> T executeWithRetry(Callable<ResultWrapper<T>> callable, int maxRetries, long delayMs) {
@@ -90,6 +93,7 @@ public class SpiderApiService {
 
     /**
      * 查询内容热榜请求
+     *
      * @param sortType 排序类型
      * @param category 分类
      * @param cursor   分页游标
@@ -123,6 +127,7 @@ public class SpiderApiService {
 
     /**
      * 搜索内容详情请求
+     *
      * @param title 标题
      * @return ContentSearchResponse.DataItem 响应对象
      */
@@ -165,10 +170,11 @@ public class SpiderApiService {
 
     /**
      * 搜索内容图片请求 baidu
+     *
      * @param keyword 搜索关键词
-     * @return ContentSearchResponse.DataItem 响应对象
+     * @return List<SpiderMediaItem> 响应对象
      */
-    public List<SpiderBaiduImageItem> searchContentImage(String keyword) {
+    public List<SpiderMediaItem> searchContentImage(String keyword) {
         // 使用通用重试方法执行,最多重试3次,延迟2000ms
         return executeWithRetry(() -> {
             // 1. 构造请求
@@ -196,9 +202,19 @@ public class SpiderApiService {
 
                 // 3. 解析数据并返回成功结果
                 SpiderContentSearchResponse<SpiderBaiduImageItem> contentSearchResponse =
-                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderBaiduImageItem>>() {});
+                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderBaiduImageItem>>() {
+                        });
                 List<SpiderBaiduImageItem> data = contentSearchResponse != null ? contentSearchResponse.getData() : null;
-                return new ResultWrapper<>(0, data); // 状态码0表示成功
+                List<SpiderMediaItem> result = new ArrayList<>();
+                if (CollectionUtil.isNotEmpty(data)) {
+                    for (SpiderBaiduImageItem item : data) {
+                        SpiderMediaItem mediaItem = new SpiderMediaItem();
+                        mediaItem.setTitle(item.getTitle());
+                        mediaItem.setUrl(item.getUrl());
+                        result.add(mediaItem);
+                    }
+                }
+                return new ResultWrapper<>(0, result); // 状态码0表示成功
             } catch (IOException e) {
                 log.error("搜索内容图片请求异常", e);
                 throw e; // 抛出异常,由通用重试方法捕获并重试
@@ -208,10 +224,11 @@ public class SpiderApiService {
 
     /**
      * 搜索内容视频请求
+     *
      * @param keyword 搜索关键词
-     * @return List<SpiderHKVideoDetailItem> 响应对象
+     * @return List<SpiderMediaItem> 响应对象
      */
-    public List<SpiderHKVideoDetailItem> searchContentVideo(String keyword) {
+    public List<SpiderMediaItem> searchContentVideo(String keyword) {
         return executeWithRetry(() -> {
             String url = baseUrl + "/crawler/hao_kan_shi_pin/keyword";
             JSONObject param = new JSONObject();
@@ -235,7 +252,8 @@ public class SpiderApiService {
                 }
 
                 SpiderContentSearchResponse<SpiderHKVideoSearchItem> contentSearchResponse =
-                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoSearchItem>>() {});
+                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoSearchItem>>() {
+                        });
                 if (contentSearchResponse == null || contentSearchResponse.getData() == null) {
                     return new ResultWrapper<>(0, new ArrayList<>()); // 无数据仍返回成功状态码
                 }
@@ -244,7 +262,21 @@ public class SpiderApiService {
                 for (SpiderHKVideoSearchItem item : contentSearchResponse.getData()) {
                     videoDetailItems.add(searchContentVideoDetail(item.getVid())); // 嵌套调用也可复用重试逻辑
                 }
-                return new ResultWrapper<>(0, videoDetailItems);
+                List<SpiderMediaItem> result = new ArrayList<>();
+                for (SpiderHKVideoDetailItem videoItem : videoDetailItems) {
+                    if (CollectionUtil.isEmpty(videoItem.getVideoURLList())) {
+                        continue;
+                    }
+                    SpiderHKVideoDetailItem.VideoURLList videoURLList = videoItem.getVideoURLList().get(0);
+                    SpiderMediaItem mediaItem = new SpiderMediaItem();
+                    mediaItem.setUrl(videoURLList.getVideoURL());
+                    if (Objects.nonNull(videoURLList.getVideoDuration())) {
+                        mediaItem.setDuration(videoURLList.getVideoDuration().intValue());
+                    }
+                    mediaItem.setTitle(videoItem.getTitle());
+                    result.add(mediaItem);
+                }
+                return new ResultWrapper<>(0, result);
             } catch (IOException e) {
                 log.error("搜索内容视频请求异常", e);
                 throw e;
@@ -280,7 +312,8 @@ public class SpiderApiService {
 
                 // 3. 解析数据并返回成功结果
                 SpiderContentSearchResponse<SpiderHKVideoDetailItem> contentSearchResponse =
-                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoDetailItem>>() {});
+                        JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoDetailItem>>() {
+                        });
                 SpiderHKVideoDetailItem data = (contentSearchResponse != null && !contentSearchResponse.getData().isEmpty())
                         ? contentSearchResponse.getData().get(0)
                         : null;

+ 15 - 20
core/src/main/java/com/tzld/supply/job/ContentMediaSearchJob.java

@@ -12,8 +12,7 @@ import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMapper;
 import com.tzld.supply.dao.mapper.supply.spider.SpiderContentMediaMapper;
 import com.tzld.supply.dao.mapper.supply.spider.ext.SpiderMapperExt;
 import com.tzld.supply.model.entity.AlgFaceRecognizeResult;
-import com.tzld.supply.model.entity.SpiderBaiduImageItem;
-import com.tzld.supply.model.entity.SpiderHKVideoDetailItem;
+import com.tzld.supply.model.entity.SpiderMediaItem;
 import com.tzld.supply.model.po.supply.spider.SpiderContent;
 import com.tzld.supply.model.po.supply.spider.SpiderContentMedia;
 import com.tzld.supply.model.po.supply.spider.SpiderContentMediaExample;
@@ -86,23 +85,23 @@ public class ContentMediaSearchJob {
     }
 
     private void imageMediaSearch(Long contentId, String keyword) {
-        List<SpiderBaiduImageItem> dataItems = spiderApiService.searchContentImage(keyword);
+        List<SpiderMediaItem> dataItems = spiderApiService.searchContentImage(keyword);
         if (CollectionUtil.isEmpty(dataItems)) {
             return;
         }
         List<SpiderContentMedia> saveList = new ArrayList<>();
         Long now = System.currentTimeMillis();
-        for (SpiderBaiduImageItem dataItem : dataItems) {
+        for (SpiderMediaItem mediaItem : dataItems) {
             // 转存到OSS
             String fileName = String.format("supply/spider/image/%s_%d.jpg", contentId, System.currentTimeMillis());
-            String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, dataItem.getUrl(), "image/jpeg");
+            String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, mediaItem.getUrl(), "image/jpeg");
             if (StringUtils.isBlank(fileUrl)) {
-                log.warn("图片转存OSS失败,URL: {}", dataItem.getUrl());
+                log.warn("图片转存OSS失败,URL: {}", mediaItem.getUrl());
                 continue;
             }
             SpiderContentMedia media = buildMedia(contentId, SpiderContentMediaTypeEnum.IMAGE.getMsg(),
-                    "百度图片", dataItem.getUrl(), null,
-                    dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)), fileUrl,
+                    "百度图片", mediaItem.getUrl(), null,
+                    mediaItem.getTitle().substring(0, Math.min(mediaItem.getTitle().length(), 100)), fileUrl,
                     SpiderContentMediaStatusEnum.WAITING.getCode(), now);
             saveList.add(media);
         }
@@ -176,33 +175,29 @@ public class ContentMediaSearchJob {
     }
 
     private void videoMediaSearch(Long contentId, String keyword) {
-        List<SpiderHKVideoDetailItem> dataItems = spiderApiService.searchContentVideo(keyword);
+        List<SpiderMediaItem> dataItems = spiderApiService.searchContentVideo(keyword);
         if (CollectionUtil.isEmpty(dataItems)) {
             return;
         }
         List<SpiderContentMedia> saveList = new ArrayList<>();
         Long now = System.currentTimeMillis();
-        for (SpiderHKVideoDetailItem dataItem : dataItems) {
-            if (CollectionUtil.isEmpty(dataItem.getVideoURLList())) {
-                continue;
-            }
-            SpiderHKVideoDetailItem.VideoURLList videoURLList = dataItem.getVideoURLList().get(0);
+        for (SpiderMediaItem mediaItem : dataItems) {
             // 时长过滤
-            if (videoURLList.getVideoDuration() > 300 || videoURLList.getVideoDuration() < 10) {
+            if (mediaItem.getDuration() > 300 || mediaItem.getDuration() < 10) {
                 continue;
             }
-            if (StringUtils.isBlank(videoURLList.getVideoURL())) {
+            if (StringUtils.isBlank(mediaItem.getUrl())) {
                 continue;
             }
             String fileName = String.format("supply/spider/video/%s_%d.mp4", contentId, System.currentTimeMillis());
-            String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, videoURLList.getVideoURL(), "video/mp4");
+            String fileUrl = AliOssFileTool.downloadAndSaveInOSS(fileName, mediaItem.getUrl(), "video/mp4");
             if (StringUtils.isBlank(fileUrl)) {
-                log.warn("视频转存OSS失败,URL: {}", videoURLList.getVideoURL());
+                log.warn("视频转存OSS失败,URL: {}", mediaItem.getUrl());
                 continue;
             }
             SpiderContentMedia media = buildMedia(contentId, SpiderContentMediaTypeEnum.VIDEO.getMsg(),
-                    "好看视频", videoURLList.getVideoURL(), (int) videoURLList.getVideoDuration() * 1000,
-                    dataItem.getTitle().substring(0, Math.min(dataItem.getTitle().length(), 100)), fileUrl,
+                    "好看视频", mediaItem.getUrl(), mediaItem.getDuration() * 1000,
+                    mediaItem.getTitle().substring(0, Math.min(mediaItem.getTitle().length(), 100)), fileUrl,
                     SpiderContentMediaStatusEnum.PASSED.getCode(), now);
             saveList.add(media);
         }

+ 7 - 7
core/src/main/java/com/tzld/supply/model/entity/SpiderHKVideoDetailItem.java

@@ -6,7 +6,7 @@ import java.util.List;
 
 @Data
 public class SpiderHKVideoDetailItem {
-    private long channel;
+    private Long channel;
     private String channelContentID;
     private String contentLink;
     private Object wxSn;
@@ -28,26 +28,26 @@ public class SpiderHKVideoDetailItem {
     private Object itemIndex;
     private Object viewCount;
     private Object playCount;
-    private long likeCount;
+    private Long likeCount;
     private Object collectCount;
-    private long commentCount;
+    private Long commentCount;
     private Object shareCount;
     private Object lookingCount;
-    private long publishTimestamp;
+    private Long publishTimestamp;
     private Object modifyTimestamp;
-    private long updateTimestamp;
+    private Long updateTimestamp;
 
 
     @Data
     public static class ImageURLList {
-        private long imageType;
+        private Long imageType;
         private String imageURL;
     }
 
     @Data
     public static class VideoURLList {
         private String videoURL;
-        private long videoDuration;
+        private Long videoDuration;
     }
 }
 

+ 0 - 1
core/src/main/java/com/tzld/supply/model/entity/SpiderMediaItem.java

@@ -6,6 +6,5 @@ import lombok.Data;
 public class SpiderMediaItem {
     private String title;        // 媒体标题
     private String url;          // 媒体URL
-    private String sourceSite;   // 来源站点
     private Integer duration;    // 视频时长(秒),图片为null
 }