Sentinel、Hystrix、Resilience4j 服务容错保护
在微服务架构中,服务间相互依赖,一个服务故障可能引发雪崩效应:
熔断降级机制通过快速失败和服务降级,保护系统稳定性。
熔断(Circuit Breaker):当服务故障率达到阈值时,自动切断请求,快速失败。类似电路保险丝。
降级(Fallback):服务不可用时,返回默认值或调用备用方法,保证基本功能可用。
限流(Rate Limiting):限制请求速率,防止系统过载。
隔离(Isolation):使用线程池或信号量隔离资源,防止故障扩散。
熔断器三种状态:
1. CLOSED(关闭)
- 正常状态,请求正常通过
- 统计失败率,达到阈值后转为 OPEN
2. OPEN(打开)
- 熔断状态,请求直接失败,不调用服务
- 等待一段时间后转为 HALF_OPEN
3. HALF_OPEN(半开)
- 尝试恢复,允许部分请求通过
- 成功则转为 CLOSED,失败则转回 OPEN
状态转换:
CLOSED --[失败率达到阈值]--> OPEN
OPEN --[等待时间到]--> HALF_OPEN
HALF_OPEN --[请求成功]--> CLOSED
HALF_OPEN --[请求失败]--> OPEN
Sentinel 是阿里巴巴开源的流量控制组件,功能强大且易用。
// pom.xml 依赖
<dependency>
<groupId>com.alibaba.cloud</groupId>
<artifactId>spring-cloud-starter-alibaba-sentinel</artifactId>
<version>2.2.9.RELEASE</version>
</dependency>
# application.yml 配置
spring:
cloud:
sentinel:
transport:
# Sentinel 控制台地址
dashboard: localhost:8080
# 与控制台通信的端口
port: 8719
# 饥饿加载
eager: true
// 下载并启动 Sentinel 控制台
java -jar sentinel-dashboard-1.8.6.jar --server.port=8080
@RestController
@RequestMapping("/api/users")
public class UserController {
// 使用注解定义资源
@GetMapping("/{id}")
@SentinelResource(
value = "getUserById",
blockHandler = "handleBlock",
fallback = "handleFallback"
)
public User getUserById(@PathVariable Long id) {
return userService.getUserById(id);
}
// 限流处理方法
public User handleBlock(Long id, BlockException ex) {
User user = new User();
user.setId(id);
user.setName("系统繁忙,请稍后重试");
return user;
}
// 降级处理方法
public User handleFallback(Long id, Throwable ex) {
User user = new User();
user.setId(id);
user.setName("服务异常,返回默认数据");
return user;
}
}
// 编程方式配置限流规则
@PostConstruct
public void initFlowRules() {
List<FlowRule> rules = new ArrayList<>();
FlowRule rule = new FlowRule();
rule.setResource("getUserById");
// QPS 限流
rule.setGrade(RuleConstant.FLOW_GRADE_QPS);
// 阈值为 10
rule.setCount(10);
// 快速失败
rule.setControlBehavior(RuleConstant.CONTROL_BEHAVIOR_DEFAULT);
rules.add(rule);
FlowRuleManager.loadRules(rules);
}
// 配置熔断规则
@PostConstruct
public void initDegradeRules() {
List<DegradeRule> rules = new ArrayList<>();
DegradeRule rule = new DegradeRule();
rule.setResource("getUserById");
// 慢调用比例策略
rule.setGrade(RuleConstant.DEGRADE_GRADE_RT);
// 响应时间超过 500ms 算慢调用
rule.setCount(500);
// 时间窗口 10 秒
rule.setTimeWindow(10);
// 最小请求数
rule.setMinRequestAmount(5);
// 慢调用比例阈值 50%
rule.setSlowRatioThreshold(0.5);
rules.add(rule);
// 异常比例策略
DegradeRule rule2 = new DegradeRule();
rule2.setResource("createOrder");
rule2.setGrade(RuleConstant.DEGRADE_GRADE_EXCEPTION_RATIO);
rule2.setCount(0.5); // 异常比例 50%
rule2.setTimeWindow(10);
rule2.setMinRequestAmount(5);
rules.add(rule2);
DegradeRuleManager.loadRules(rules);
}
@GetMapping("/products/{id}")
@SentinelResource(
value = "getProduct",
blockHandler = "handleBlock"
)
public Product getProduct(@PathVariable Long id) {
return productService.getById(id);
}
// 配置热点参数限流
@PostConstruct
public void initParamFlowRules() {
ParamFlowRule rule = new ParamFlowRule();
rule.setResource("getProduct");
// 限流模式:QPS
rule.setGrade(RuleConstant.FLOW_GRADE_QPS);
// 参数索引(第一个参数)
rule.setParamIdx(0);
// 阈值
rule.setCount(5);
// 针对特定参数值的限流
ParamFlowItem item = new ParamFlowItem();
item.setObject("1"); // 商品 ID 为 1
item.setClassType(Long.class.getName());
item.setCount(10); // 特殊阈值
rule.setParamFlowItemList(Collections.singletonList(item));
ParamFlowRuleManager.loadRules(Collections.singletonList(rule));
}
Resilience4j 是轻量级的容错库,Spring Cloud 官方推荐。
// pom.xml 依赖
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-circuitbreaker-resilience4j</artifactId>
</dependency>
# application.yml 配置
resilience4j:
circuitbreaker:
instances:
userService:
# 滑动窗口大小
slidingWindowSize: 10
# 失败率阈值
failureRateThreshold: 50
# 慢调用阈值
slowCallRateThreshold: 50
# 慢调用时间
slowCallDurationThreshold: 2s
# 等待时间(OPEN -> HALF_OPEN)
waitDurationInOpenState: 10s
# 半开状态允许的调用数
permittedNumberOfCallsInHalfOpenState: 3
# 自动从 OPEN 转为 HALF_OPEN
automaticTransitionFromOpenToHalfOpenEnabled: true
ratelimiter:
instances:
userService:
# 限流周期
limitRefreshPeriod: 1s
# 周期内允许的请求数
limitForPeriod: 10
# 等待时间
timeoutDuration: 0s
retry:
instances:
userService:
# 最大重试次数
maxAttempts: 3
# 等待时间
waitDuration: 1s
# 重试异常
retryExceptions:
- java.io.IOException
- java.util.concurrent.TimeoutException
@Service
public class UserService {
@Autowired
private UserServiceClient userServiceClient;
@Autowired
private CircuitBreakerFactory circuitBreakerFactory;
// 使用熔断器
public User getUserById(Long id) {
CircuitBreaker circuitBreaker = circuitBreakerFactory.create("userService");
return circuitBreaker.run(
() -> userServiceClient.getUserById(id),
throwable -> getFallbackUser(id)
);
}
private User getFallbackUser(Long id) {
User user = new User();
user.setId(id);
user.setName("默认用户");
return user;
}
}
// 使用注解
@Service
public class OrderService {
@CircuitBreaker(name = "orderService", fallbackMethod = "createOrderFallback")
@RateLimiter(name = "orderService")
@Retry(name = "orderService")
public Order createOrder(OrderRequest request) {
return orderServiceClient.createOrder(request);
}
private Order createOrderFallback(OrderRequest request, Exception ex) {
Order order = new Order();
order.setStatus("PENDING");
order.setMessage("订单创建失败,请稍后重试");
return order;
}
}
@Component
public class CircuitBreakerEventListener {
@Autowired
private CircuitBreakerRegistry circuitBreakerRegistry;
@PostConstruct
public void init() {
circuitBreakerRegistry.circuitBreaker("userService")
.getEventPublisher()
.onStateTransition(event -> {
System.out.println(String.format(
"熔断器状态变更: %s -> %s",
event.getStateTransition().getFromState(),
event.getStateTransition().getToState()
));
})
.onError(event -> {
System.out.println("请求失败: " + event.getThrowable().getMessage());
})
.onSuccess(event -> {
System.out.println("请求成功,耗时: " + event.getElapsedDuration().toMillis() + "ms");
});
}
}
// Hystrix 已停止维护,但仍被广泛使用
// pom.xml 依赖
<dependency>
<groupId>org.springframework.cloud</groupId>
<artifactId>spring-cloud-starter-netflix-hystrix</artifactId>
</dependency>
// 启动类
@SpringBootApplication
@EnableCircuitBreaker
public class Application {
public static void main(String[] args) {
SpringApplication.run(Application.class, args);
}
}
// 使用 Hystrix
@Service
public class UserService {
@HystrixCommand(
fallbackMethod = "getUserFallback",
commandProperties = {
@HystrixProperty(name = "execution.isolation.thread.timeoutInMilliseconds", value = "3000"),
@HystrixProperty(name = "circuitBreaker.requestVolumeThreshold", value = "10"),
@HystrixProperty(name = "circuitBreaker.errorThresholdPercentage", value = "50"),
@HystrixProperty(name = "circuitBreaker.sleepWindowInMilliseconds", value = "10000")
}
)
public User getUserById(Long id) {
return userServiceClient.getUserById(id);
}
public User getUserFallback(Long id) {
User user = new User();
user.setId(id);
user.setName("降级用户");
return user;
}
}
// 启用 Feign 熔断
feign:
circuitbreaker:
enabled: true
// Feign 客户端
@FeignClient(name = "user-service", fallback = UserServiceFallback.class)
public interface UserServiceClient {
@GetMapping("/api/users/{id}")
User getUserById(@PathVariable("id") Long id);
}
// 降级实现
@Component
public class UserServiceFallback implements UserServiceClient {
@Override
public User getUserById(Long id) {
User user = new User();
user.setId(id);
user.setName("服务降级");
return user;
}
}
// 或使用 FallbackFactory 获取异常信息
@Component
public class UserServiceFallbackFactory implements FallbackFactory<UserServiceClient> {
@Override
public UserServiceClient create(Throwable cause) {
return new UserServiceClient() {
@Override
public User getUserById(Long id) {
System.out.println("降级原因: " + cause.getMessage());
User user = new User();
user.setId(id);
user.setName("服务降级");
return user;
}
};
}
}
// 1. 返回默认值
public User getUserFallback(Long id) {
return User.builder()
.id(id)
.name("默认用户")
.build();
}
// 2. 返回缓存数据
public User getUserFallback(Long id) {
return cacheService.get("user:" + id);
}
// 3. 调用备用服务
public User getUserFallback(Long id) {
return backupUserService.getUserById(id);
}
// 4. 返回空对象
public List<Product> getProductsFallback() {
return Collections.emptyList();
}
// 5. 抛出业务异常
public Order createOrderFallback(OrderRequest request, Exception ex) {
throw new BusinessException("订单服务不可用,请稍后重试");
}
Q: Sentinel、Hystrix、Resilience4j 如何选择?
A: Sentinel 功能最全面(推荐),Resilience4j 轻量级且是官方推荐,Hystrix 已停止维护但仍可用。
Q: 熔断和降级有什么区别?
A: 熔断是自动切断请求,降级是提供备用方案。熔断是手段,降级是结果。熔断后通常会触发降级。
熔断降级是微服务容错的核心机制。通过本课学习,你应该掌握: