瀏覽代碼

爬虫增加重试

wangyunpeng 2 周之前
父節點
當前提交
123d7bff64
共有 1 個文件被更改,包括 179 次插入103 次删除
  1. 179 103
      core/src/main/java/com/tzld/supply/api/SpiderApiService.java

+ 179 - 103
core/src/main/java/com/tzld/supply/api/SpiderApiService.java

@@ -3,6 +3,7 @@ package com.tzld.supply.api;
 import com.alibaba.fastjson.JSONObject;
 import com.alibaba.fastjson.TypeReference;
 import com.tzld.supply.model.entity.*;
+import lombok.Getter;
 import lombok.extern.slf4j.Slf4j;
 import okhttp3.*;
 import org.springframework.beans.factory.annotation.Value;
@@ -12,6 +13,7 @@ import javax.annotation.PostConstruct;
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.concurrent.Callable;
 import java.util.concurrent.TimeUnit;
 
 @Slf4j
@@ -22,6 +24,10 @@ public class SpiderApiService {
 
     @Value("${spider.api.base-url:http://crawapi.piaoquantv.com}")
     private String baseUrl;
+    @Value("${spider.retry.max:5}")
+    private int maxRetries;
+    @Value("${spider.retry.delay-ms:1000}")
+    private long delayMs;
 
     @PostConstruct
     public void init() {
@@ -32,6 +38,56 @@ public class SpiderApiService {
                 .build();
     }
 
+    /**
+     * API结果包装类,用于捕获返回码和数据
+     */
+    @Getter
+    private static class ResultWrapper<T> {
+        private int code; // 状态码:0成功,非0失败
+        private T data;   // 业务数据
+
+        public ResultWrapper(int code, T data) {
+            this.code = code;
+            this.data = data;
+        }
+    }
+
+    /**
+     * 重试执行方法
+     * @param callable 业务逻辑回调(返回ResultWrapper,包含状态码和数据)
+     * @param maxRetries 最大重试次数
+     * @param delayMs 重试延迟(毫秒)
+     * @return 业务数据(T)
+     */
+    private <T> T executeWithRetry(Callable<ResultWrapper<T>> callable, int maxRetries, long delayMs) {
+        int retryCount = 0;
+        while (retryCount < maxRetries) {
+            try {
+                ResultWrapper<T> result = callable.call();
+                if (result.getCode() == 0) { // 状态码为0表示成功,直接返回数据
+                    return result.getData();
+                } else {
+                    log.warn("业务执行失败,状态码:{},重试次数:{}", result.getCode(), retryCount + 1);
+                }
+            } catch (Exception e) {
+                log.error("执行异常,重试次数:{},异常信息:{}", retryCount + 1, e.getMessage(), e);
+            }
+
+            retryCount++;
+            if (retryCount < maxRetries) { // 最后一次重试后不再延迟
+                try {
+                    TimeUnit.MILLISECONDS.sleep(delayMs);
+                } catch (InterruptedException ie) {
+                    log.error("重试延迟被中断:{}", ie.getMessage(), ie);
+                    Thread.currentThread().interrupt(); // 恢复中断状态
+                    return null;
+                }
+            }
+        }
+        log.error("达到最大重试次数({}次),执行失败", maxRetries);
+        return null;
+    }
+
     /**
      * 查询内容热榜请求
      * @param sortType 排序类型
@@ -71,44 +127,40 @@ public class SpiderApiService {
      * @return ContentSearchResponse.DataItem 响应对象
      */
     public List<ContentSearchResponse.DataItem> searchContentDetail(String title) {
-        // 搜索内容详情
-        String url = baseUrl + "/crawler/bai_du/keyword";
-        JSONObject param = new JSONObject();
-        param.put("keyword", title);
-        RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
-        Request request = new Request.Builder()
-                .url(url)
-                .post(body)
-                .build();
-        int maxRetries = 3; // 最大重试次数
-        int retryCount = 0; // 当前重试计数
-        while (retryCount < maxRetries) {
-            try {
-                Response response = client.newCall(request).execute();
-                if (response.isSuccessful()) {
-                    // 请求成功,解析响应并返回
-                    String responseBody = response.body().string();
-                    // 将 JSON 响应映射到 ContentSearchResponse 对象
-                    JSONObject jsonObject = JSONObject.parseObject(responseBody);
-                    ContentSearchResponse contentSearchResponse = jsonObject.getObject("data", ContentSearchResponse.class);
-                    if (contentSearchResponse != null && contentSearchResponse.getData() != null) {
-                        return contentSearchResponse.getData();
-                    }
+        // 使用通用重试方法执行,统一处理重试逻辑
+        return executeWithRetry(() -> {
+            // 1. 构造请求
+            String url = baseUrl + "/crawler/bai_du/keyword";
+            JSONObject param = new JSONObject();
+            param.put("keyword", title);
+            RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
+            Request request = new Request.Builder()
+                    .url(url)
+                    .post(body)
+                    .build();
+
+            // 2. 执行请求并解析响应(使用 try-with-resources 自动关闭响应流)
+            try (Response response = client.newCall(request).execute()) {
+                if (!response.isSuccessful()) {
+                    return new ResultWrapper<>(-1, null); // HTTP 状态码非 2xx,触发重试
                 }
-            } catch (IOException e) {
-                log.error("搜索内容详情失败: {}", e.getMessage(), e);
-            }
-            retryCount++;
-            // 如果还有重试次数,添加短暂延迟后重试
-            if (retryCount < maxRetries) {
-                try {
-                    TimeUnit.MILLISECONDS.sleep(2000); // 2秒延迟
-                } catch (InterruptedException ie) {
-                    log.error("重试被中断: {}", ie.getMessage(), ie);
+
+                String responseBody = response.body().string();
+                JSONObject jsonObject = JSONObject.parseObject(responseBody);
+                int code = jsonObject.getIntValue("code"); // 解析业务状态码
+                if (code != 0) {
+                    return new ResultWrapper<>(code, null); // 业务状态码非 0,触发重试
                 }
+
+                // 3. 解析数据并返回成功结果
+                ContentSearchResponse contentSearchResponse = jsonObject.getObject("data", ContentSearchResponse.class);
+                List<ContentSearchResponse.DataItem> data = (contentSearchResponse != null) ? contentSearchResponse.getData() : null;
+                return new ResultWrapper<>(0, data); // 状态码 0 表示成功,返回数据
+            } catch (IOException e) {
+                log.error("搜索内容详情请求异常", e);
+                throw e; // 抛出异常,由通用重试方法捕获并重试
             }
-        }
-        return null;
+        }, maxRetries, delayMs); // 使用类级别的重试参数(统一配置)
     }
 
     /**
@@ -117,102 +169,126 @@ public class SpiderApiService {
      * @return ContentSearchResponse.DataItem 响应对象
      */
     public List<SpiderBaiduImageItem> searchContentImage(String keyword) {
-        // 搜索内容图片
-        String url = baseUrl + "/crawler/bai_du/search_image";
-        JSONObject param = new JSONObject();
-        param.put("keyword", keyword);
-        RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
-        Request request = new Request.Builder()
-                .url(url)
-                .post(body)
-                .build();
-        try {
-            Response response = client.newCall(request).execute();
-            if (response.isSuccessful()) {
-                // 请求成功,解析响应并返回
+        // 使用通用重试方法执行,最多重试3次,延迟2000ms
+        return executeWithRetry(() -> {
+            // 1. 构造请求
+            String url = baseUrl + "/crawler/bai_du/search_image";
+            JSONObject param = new JSONObject();
+            param.put("keyword", keyword);
+            RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
+            Request request = new Request.Builder()
+                    .url(url)
+                    .post(body)
+                    .build();
+
+            // 2. 执行请求并解析响应
+            try (Response response = client.newCall(request).execute()) {
+                if (!response.isSuccessful()) {
+                    return new ResultWrapper<>(-1, null); // HTTP状态码非2xx,返回失败状态码
+                }
+
                 String responseBody = response.body().string();
-                // 将 JSON 响应映射到 ContentSearchResponse 对象
                 JSONObject jsonObject = JSONObject.parseObject(responseBody);
+                int code = jsonObject.getIntValue("code"); // 假设API返回格式包含code字段
+                if (code != 0) {
+                    return new ResultWrapper<>(code, null); // 业务状态码非0,返回失败状态码
+                }
 
+                // 3. 解析数据并返回成功结果
                 SpiderContentSearchResponse<SpiderBaiduImageItem> contentSearchResponse =
                         JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderBaiduImageItem>>() {});
-                if (contentSearchResponse != null && contentSearchResponse.getData() != null) {
-                    return contentSearchResponse.getData();
-                }
+                List<SpiderBaiduImageItem> data = contentSearchResponse != null ? contentSearchResponse.getData() : null;
+                return new ResultWrapper<>(0, data); // 状态码0表示成功
+            } catch (IOException e) {
+                log.error("搜索内容图片请求异常", e);
+                throw e; // 抛出异常,由通用重试方法捕获并重试
             }
-        } catch (IOException e) {
-            log.error("搜索内容图片失败: {}", e.getMessage(), e);
-        }
-
-        return null;
+        }, maxRetries, delayMs); // 重试参数:3次重试,2秒延迟
     }
 
     /**
      * 搜索内容视频请求
      * @param keyword 搜索关键词
-     * @return List<SpiderHKVideoSearchItem> 响应对象
+     * @return List<SpiderHKVideoDetailItem> 响应对象
      */
     public List<SpiderHKVideoDetailItem> searchContentVideo(String keyword) {
-        // 搜索内容视频
-        String url = baseUrl + "/crawler/hao_kan_shi_pin/keyword";
-        JSONObject param = new JSONObject();
-        param.put("keyword", keyword);
-        RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
-        Request request = new Request.Builder()
-                .url(url)
-                .post(body)
-                .build();
-        try {
-            Response response = client.newCall(request).execute();
-            if (response.isSuccessful()) {
-                // 请求成功,解析响应并返回
+        return executeWithRetry(() -> {
+            String url = baseUrl + "/crawler/hao_kan_shi_pin/keyword";
+            JSONObject param = new JSONObject();
+            param.put("keyword", keyword);
+            RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
+            Request request = new Request.Builder()
+                    .url(url)
+                    .post(body)
+                    .build();
+
+            try (Response response = client.newCall(request).execute()) {
+                if (!response.isSuccessful()) {
+                    return new ResultWrapper<>(-1, null);
+                }
+
                 String responseBody = response.body().string();
-                // 将 JSON 响应映射到 ContentSearchResponse 对象
                 JSONObject jsonObject = JSONObject.parseObject(responseBody);
+                int code = jsonObject.getIntValue("code");
+                if (code != 0) {
+                    return new ResultWrapper<>(code, null);
+                }
 
                 SpiderContentSearchResponse<SpiderHKVideoSearchItem> contentSearchResponse =
                         JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoSearchItem>>() {});
-                if (contentSearchResponse != null && contentSearchResponse.getData() != null) {
-                    List<SpiderHKVideoDetailItem> videoDetailItems = new ArrayList<>();
-                    for (SpiderHKVideoSearchItem item : contentSearchResponse.getData()) {
-                        videoDetailItems.add(searchContentVideoDetail(item.getVid()));
-                    }
-                    return videoDetailItems;
+                if (contentSearchResponse == null || contentSearchResponse.getData() == null) {
+                    return new ResultWrapper<>(0, new ArrayList<>()); // 无数据仍返回成功状态码
                 }
+
+                List<SpiderHKVideoDetailItem> videoDetailItems = new ArrayList<>();
+                for (SpiderHKVideoSearchItem item : contentSearchResponse.getData()) {
+                    videoDetailItems.add(searchContentVideoDetail(item.getVid())); // 嵌套调用也可复用重试逻辑
+                }
+                return new ResultWrapper<>(0, videoDetailItems);
+            } catch (IOException e) {
+                log.error("搜索内容视频请求异常", e);
+                throw e;
             }
-        } catch (IOException e) {
-            log.error("搜索内容图片失败: {}", e.getMessage(), e);
-        }
-        return null;
+        }, maxRetries, delayMs);
     }
 
     public SpiderHKVideoDetailItem searchContentVideoDetail(String videoId) {
-        // 搜索内容视频详情
-        String url = baseUrl + "/crawler/hao_kan_shi_pin/detail";
-        JSONObject param = new JSONObject();
-        param.put("content_id", videoId);
-        RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
-        Request request = new Request.Builder()
-                .url(url)
-                .post(body)
-                .build();
-        try {
-            Response response = client.newCall(request).execute();
-            if (response.isSuccessful()) {
-                // 请求成功,解析响应并返回
+        // 使用通用重试方法执行,支持 code != 0 时重试
+        return executeWithRetry(() -> {
+            // 1. 构造请求
+            String url = baseUrl + "/crawler/hao_kan_shi_pin/detail";
+            JSONObject param = new JSONObject();
+            param.put("content_id", videoId);
+            RequestBody body = RequestBody.create(MediaType.parse("application/json; charset=utf-8"), param.toJSONString());
+            Request request = new Request.Builder()
+                    .url(url)
+                    .post(body)
+                    .build();
+
+            // 2. 执行请求并解析响应(使用 try-with-resources 自动关闭响应流)
+            try (Response response = client.newCall(request).execute()) {
+                if (!response.isSuccessful()) {
+                    return new ResultWrapper<>(-1, null); // HTTP 状态码非 2xx,返回失败状态码
+                }
+
                 String responseBody = response.body().string();
-                // 将 JSON 响应映射到 ContentSearchResponse 对象
                 JSONObject jsonObject = JSONObject.parseObject(responseBody);
+                int code = jsonObject.getIntValue("code"); // 解析业务状态码
+                if (code != 0) {
+                    return new ResultWrapper<>(code, null); // 业务状态码非 0,触发重试
+                }
 
+                // 3. 解析数据并返回成功结果
                 SpiderContentSearchResponse<SpiderHKVideoDetailItem> contentSearchResponse =
                         JSONObject.parseObject(jsonObject.getString("data"), new TypeReference<SpiderContentSearchResponse<SpiderHKVideoDetailItem>>() {});
-                if (contentSearchResponse != null && contentSearchResponse.getData() != null) {
-                    return contentSearchResponse.getData().get(0);
-                }
+                SpiderHKVideoDetailItem data = (contentSearchResponse != null && !contentSearchResponse.getData().isEmpty())
+                        ? contentSearchResponse.getData().get(0)
+                        : null;
+                return new ResultWrapper<>(0, data); // 状态码 0 表示成功,返回数据
+            } catch (IOException e) {
+                log.error("搜索内容视频详情请求异常", e);
+                throw e; // 抛出异常,由通用重试方法捕获并重试
             }
-        } catch (IOException e) {
-            log.error("搜索内容视频详情失败: {}", e.getMessage(), e);
-        }
-        return null;
+        }, maxRetries, delayMs); // 使用类级别的重试参数(maxRetries=3,delayMs=1000ms)
     }
 }