|
|
@@ -26,33 +26,33 @@ import java.util.concurrent.atomic.AtomicInteger;
|
|
|
|
|
|
/**
|
|
|
* FeatureV2 gRPC 客户端
|
|
|
- *
|
|
|
+ *
|
|
|
* 优化说明:
|
|
|
* 1. 增加异常处理:捕获 StatusRuntimeException,避免错误向上传播
|
|
|
* 2. 增加重试机制:对 UNAVAILABLE 等可恢复错误自动重试(50ms 延迟)
|
|
|
* 3. 增加降级处理:重试失败后返回空结果,保证服务稳定
|
|
|
* 4. 增加详细日志:记录错误详情,便于问题排查
|
|
|
- *
|
|
|
+ *
|
|
|
* @author dyp
|
|
|
*/
|
|
|
@Component
|
|
|
@Slf4j
|
|
|
public class FeatureV2Client {
|
|
|
-
|
|
|
+
|
|
|
@GrpcClient("recommend-feature")
|
|
|
private FeatureV2ServiceGrpc.FeatureV2ServiceBlockingStub client;
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* 最大重试次数
|
|
|
* 说明:对于网络连接问题,重试可以触发连接重建
|
|
|
*/
|
|
|
private static final int MAX_RETRY_ATTEMPTS = 2;
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* 重试延迟(毫秒)
|
|
|
* 说明:50ms 快速重试,给连接重建预留时间
|
|
|
*/
|
|
|
- private static final long RETRY_DELAY_MS = 50;
|
|
|
+ private static final long RETRY_DELAY_MS = 10;
|
|
|
|
|
|
/**
|
|
|
* 单次 gRPC 请求的拆包大小(按 key 数量)
|
|
|
@@ -92,7 +92,7 @@ public class FeatureV2Client {
|
|
|
|
|
|
/**
|
|
|
* 批量获取特征数据
|
|
|
- *
|
|
|
+ *
|
|
|
* @param protos 特征请求列表
|
|
|
* @return 特征数据 Map,key 为 uniqueKey,value 为特征值 JSON 字符串
|
|
|
*/
|
|
|
@@ -142,61 +142,61 @@ public class FeatureV2Client {
|
|
|
System.currentTimeMillis() - startTime);
|
|
|
return merged;
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* 带重试的特征获取方法
|
|
|
- *
|
|
|
+ *
|
|
|
* @param protos 特征请求列表
|
|
|
* @param attemptCount 当前重试次数(从 0 开始)
|
|
|
* @return 特征数据 Map
|
|
|
*/
|
|
|
private Map<String, String> multiGetFeatureWithRetry(List<FeatureKeyProto> protos, int attemptCount) {
|
|
|
log.warn("multiGetFeatureWithRetry: start, attempt={}, protos.size={}", attemptCount, protos.size());
|
|
|
-
|
|
|
+
|
|
|
MultiGetFeatureRequest request = MultiGetFeatureRequest.newBuilder()
|
|
|
.addAllFeatureKey(protos)
|
|
|
.build();
|
|
|
-
|
|
|
+
|
|
|
try {
|
|
|
// 调用 gRPC 服务,设置 3 秒超时
|
|
|
MultiGetFeatureResponse response = client
|
|
|
.withDeadlineAfter(3, TimeUnit.SECONDS)
|
|
|
.multiGetFeature(request);
|
|
|
-
|
|
|
+
|
|
|
// 响应为空或没有结果
|
|
|
if (response == null || !response.hasResult()) {
|
|
|
log.info("multiGetFeature grpc error: response is null or has no result, attempt={}", attemptCount);
|
|
|
return Collections.emptyMap();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 业务错误码检查
|
|
|
if (response.getResult().getCode() != 1) {
|
|
|
log.info("multiGetFeature grpc code={}, msg={}, attempt={}", response.getResult().getCode(),
|
|
|
response.getResult().getMessage(), attemptCount);
|
|
|
return Collections.emptyMap();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 特征数据为空
|
|
|
if (response.getFeatureCount() == 0) {
|
|
|
log.info("multiGetFeature no feature, attempt={}", attemptCount);
|
|
|
return Collections.emptyMap();
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 成功返回特征数据
|
|
|
return response.getFeatureMap();
|
|
|
-
|
|
|
+
|
|
|
} catch (StatusRuntimeException e) {
|
|
|
Status.Code code = e.getStatus().getCode();
|
|
|
String description = e.getStatus().getDescription();
|
|
|
-
|
|
|
+
|
|
|
// 记录详细的错误信息(使用 error 级别确保一定会输出)
|
|
|
- log.error("gRPC call failed: code={}, description={}, attempt={}/{}, protos.size={}, exception={}",
|
|
|
+ log.error("gRPC call failed: code={}, description={}, attempt={}/{}, protos.size={}, exception={}",
|
|
|
code, description, attemptCount + 1, MAX_RETRY_ATTEMPTS + 1, protos.size(), e.getClass().getName(), e);
|
|
|
-
|
|
|
+
|
|
|
// 判断是否应该重试
|
|
|
if (shouldRetry(code) && attemptCount < MAX_RETRY_ATTEMPTS) {
|
|
|
log.warn("Retrying gRPC call after {}ms, attempt={}/{}, reason={}", RETRY_DELAY_MS, attemptCount + 1, MAX_RETRY_ATTEMPTS, code);
|
|
|
-
|
|
|
+
|
|
|
// 等待一段时间后重试(给连接重建预留时间)
|
|
|
try {
|
|
|
Thread.sleep(RETRY_DELAY_MS);
|
|
|
@@ -204,26 +204,26 @@ public class FeatureV2Client {
|
|
|
Thread.currentThread().interrupt();
|
|
|
log.warn("Retry sleep interrupted", ie);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 递归调用,进行重试
|
|
|
return multiGetFeatureWithRetry(protos, attemptCount + 1);
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
// 重试失败或不可重试的错误,降级返回空结果
|
|
|
- log.error("gRPC call failed after {} attempts, returning empty result for graceful degradation. code={}",
|
|
|
+ log.error("gRPC call failed after {} attempts, returning empty result for graceful degradation. code={}",
|
|
|
attemptCount + 1, code);
|
|
|
return Collections.emptyMap();
|
|
|
} catch (Exception e) {
|
|
|
// 捕获所有其他异常(不应该发生,但为了安全起见)
|
|
|
- log.error("multiGetFeatureWithRetry: unexpected exception, attempt={}/{}, protos.size={}, exception={}",
|
|
|
+ log.error("multiGetFeatureWithRetry: unexpected exception, attempt={}/{}, protos.size={}, exception={}",
|
|
|
attemptCount + 1, MAX_RETRY_ATTEMPTS + 1, protos.size(), e.getClass().getName(), e);
|
|
|
return Collections.emptyMap();
|
|
|
}
|
|
|
}
|
|
|
-
|
|
|
+
|
|
|
/**
|
|
|
* 判断错误是否应该重试
|
|
|
- *
|
|
|
+ *
|
|
|
* @param code gRPC 状态码
|
|
|
* @return true 表示应该重试,false 表示不应该重试
|
|
|
*/
|
|
|
@@ -232,8 +232,8 @@ public class FeatureV2Client {
|
|
|
// DEADLINE_EXCEEDED: 超时 - 应该重试
|
|
|
// RESOURCE_EXHAUSTED: 资源耗尽(如连接池满)- 应该重试
|
|
|
// CANCELLED: 请求被取消(如服务端处理失败、连接中断)- 应该重试
|
|
|
- return code == Status.Code.UNAVAILABLE
|
|
|
- || code == Status.Code.DEADLINE_EXCEEDED
|
|
|
+ return code == Status.Code.UNAVAILABLE
|
|
|
+ || code == Status.Code.DEADLINE_EXCEEDED
|
|
|
|| code == Status.Code.RESOURCE_EXHAUSTED
|
|
|
|| code == Status.Code.CANCELLED;
|
|
|
}
|