【Spring AI实战】第10章 多大模型统一适配与动态切换
1. 统一模型调用接口兼容百度文心、阿里通义、讯飞星火、OpenAI我来介绍 Spring AI 的统一模型调用接口设计以及如何兼容多个主流大模型。Spring AI 核心概念Spring AI 提供了统一的抽象接口来调用各种 AI 模型核心接口// 核心聊天接口 public interface ChatClient { ChatResponse call(Prompt prompt); } // 流式聊天接口 public interface StreamingChatClient { FluxChatResponse stream(Prompt prompt); }通用消息结构public class Prompt { private ListMessage messages; // 包含系统指令、用户消息、AI回复等 } public interface Message { String getContent(); MessageType getMessageType(); MapString, Object getMetadata(); }多模型兼容实现2.1 配置文件示例spring: ai: # OpenAI openai: api-key: ${OPENAI_API_KEY} chat: model: gpt-4 temperature: 0.7 # 百度文心 baidu-qianfan: api-key: ${BAIDU_API_KEY} secret-key: ${BAIDU_SECRET_KEY} chat: model: ERNIE-Bot-4 temperature: 0.7 # 阿里通义 alibaba-dashscope: api-key: ${ALIBABA_API_KEY} chat: model: qwen-max temperature: 0.7 # 讯飞星火 iflytek-spark: app-id: ${IFLYTEK_APP_ID} api-key: ${IFLYTEK_API_KEY} api-secret: ${IFLYTEK_API_SECRET} chat: model: spark-3.0 temperature: 0.72.2 统一调用服务Service public class UnifiedAIService { Value(${spring.ai.provider:openai}) private String aiProvider; private final MapString, ChatClient chatClients; private final MapString, StreamingChatClient streamingClients; public UnifiedAIService( ChatClient openAiChatClient, StreamingChatClient openAiStreamingChatClient, ChatClient baiduChatClient, StreamingChatClient baiduStreamingChatClient, ChatClient alibabaChatClient, StreamingChatClient alibabaStreamingChatClient, ChatClient iflytekChatClient, StreamingChatClient iflytekStreamingChatClient) { chatClients Map.of( openai, openAiChatClient, baidu, baiduChatClient, alibaba, alibabaChatClient, iflytek, iflytekChatClient ); streamingClients Map.of( openai, openAiStreamingChatClient, baidu, baiduStreamingChatClient, alibaba, alibabaStreamingChatClient, iflytek, iflytekStreamingChatClient ); } public String chat(String message) { ChatClient client chatClients.get(aiProvider); Prompt prompt new Prompt(new UserMessage(message)); ChatResponse response client.call(prompt); return response.getResult().getOutput().getContent(); } public FluxString streamChat(String message) { StreamingChatClient client streamingClients.get(aiProvider); Prompt prompt new Prompt(new UserMessage(message)); return client.stream(prompt) .map(response - response.getResult().getOutput().getContent()); } // 动态切换提供商 public String chatWithProvider(String provider, String message) { ChatClient client chatClients.get(provider); Prompt prompt new Prompt(new UserMessage(message)); ChatResponse response client.call(prompt); return response.getResult().getOutput().getContent(); } }2.3 工厂模式实现Component public class AIClientFactory { Autowired private ChatClient openAiChatClient; Autowired private ChatClient baiduChatClient; Autowired private ChatClient alibabaChatClient; Autowired private ChatClient iflytekChatClient; public ChatClient getChatClient(AIProvider provider) { return switch (provider) { case OPENAI - openAiChatClient; case BAIDU - baiduChatClient; case ALIBABA - alibabaChatClient; case IFLYTEK - iflytekChatClient; default - throw new IllegalArgumentException(Unsupported provider); }; } public enum AIProvider { OPENAI, BAIDU, ALIBABA, IFLYTEK } }高级功能封装3.1 统一配置类Configuration public class AIConfig { Bean ConditionalOnProperty(name spring.ai.provider, havingValue openai) public ChatClient openAiChatClient(OpenAiChatOptions options) { return new OpenAiChatClient(options); } Bean ConditionalOnProperty(name spring.ai.provider, havingValue baidu) public ChatClient baiduChatClient(BaiduQianfanChatOptions options) { return new BaiduQianfanChatClient(options); } Bean ConditionalOnProperty(name spring.ai.provider, havingValue alibaba) public ChatClient alibabaChatClient(AlibabaDashScopeChatOptions options) { return new AlibabaDashScopeChatClient(options); } Bean ConditionalOnProperty(name spring.ai.provider, havingValue iflytek) public ChatClient iflytekChatClient(IflytekSparkChatOptions options) { return new IflytekSparkChatClient(options); } }3.2 负载均衡和降级Service public class LoadBalancedAIService { private final ListChatClient clients; private final AtomicInteger counter new AtomicInteger(0); public LoadBalancedAIService( Autowired(required false) ChatClient openAiChatClient, Autowired(required false) ChatClient baiduChatClient, Autowired(required false) ChatClient alibabaChatClient, Autowired(required false) ChatClient iflytekChatClient) { clients new ArrayList(); if (openAiChatClient ! null) clients.add(openAiChatClient); if (baiduChatClient ! null) clients.add(baiduChatClient); if (alibabaChatClient ! null) clients.add(alibabaChatClient); if (iflytekChatClient ! null) clients.add(iflytekChatClient); } public String chatWithFallback(String message) { for (ChatClient client : clients) { try { Prompt prompt new Prompt(new UserMessage(message)); ChatResponse response client.call(prompt); return response.getResult().getOutput().getContent(); } catch (Exception e) { // 记录日志尝试下一个 continue; } } throw new RuntimeException(All AI services are unavailable); } public String roundRobinChat(String message) { if (clients.isEmpty()) { throw new RuntimeException(No AI clients available); } int index counter.getAndIncrement() % clients.size(); ChatClient client clients.get(index); Prompt prompt new Prompt(new UserMessage(message)); ChatResponse response client.call(prompt); return response.getResult().getOutput().getContent(); } }3.3 统一异常处理RestControllerAdvice public class AIExceptionHandler { ExceptionHandler(ApiException.class) public ResponseEntityErrorResponse handleAIException(ApiException e) { ErrorResponse error new ErrorResponse( AI_SERVICE_ERROR, e.getMessage(), LocalDateTime.now() ); return ResponseEntity.status(500).body(error); } ExceptionHandler(RateLimitException.class) public ResponseEntityErrorResponse handleRateLimit(RateLimitException e) { ErrorResponse error new ErrorResponse( RATE_LIMIT_EXCEEDED, API rate limit exceeded, please try again later, LocalDateTime.now() ); return ResponseEntity.status(429).body(error); } }使用示例4.1 REST API 接口RestController RequestMapping(/api/ai) public class AIController { Autowired private UnifiedAIService aiService; PostMapping(/chat) public ResponseEntityChatResponse chat(RequestBody ChatRequest request) { String response aiService.chat(request.getMessage()); return ResponseEntity.ok(new ChatResponse(response)); } PostMapping(/chat/stream) public FluxServerSentEventString streamChat(RequestBody ChatRequest request) { return aiService.streamChat(request.getMessage()) .map(content - ServerSentEvent.builder(content).build()); } PostMapping(/chat/{provider}) public ResponseEntityChatResponse chatWithProvider( PathVariable String provider, RequestBody ChatRequest request) { String response aiService.chatWithProvider(provider, request.getMessage()); return ResponseEntity.ok(new ChatResponse(response)); } }4.2 DTO 对象Data public class ChatRequest { private String message; private String model; // 可选指定模型 private Double temperature; // 可选 private Integer maxTokens; // 可选 } Data AllArgsConstructor public class ChatResponse { private String content; private String model; private LocalDateTime timestamp; public ChatResponse(String content) { this.content content; this.model default; this.timestamp LocalDateTime.now(); } }最佳实践建议配置管理 : 使用 Spring Cloud Config 或 Apollo 管理不同环境的配置监控指标 : 集成 Micrometer 收集调用延迟、成功率等指标缓存策略 : 对常见问题结果进行缓存限流熔断 : 使用 Resilience4j 实现限流和熔断异步处理 : 对耗时请求使用异步处理成本控制 : 记录各模型使用量控制成本这样设计的好处是统一接口 : 业务代码无需关心底层模型实现灵活切换 : 可动态切换不同 AI 提供商易于扩展 : 新增模型只需添加配置和客户端容错能力强 : 支持降级和负载均衡维护方便 : 集中管理所有 AI 相关配置2. 动态模型路由根据业务场景自动切换模型我来详细介绍一下 Spring AI 中的动态模型路由方案以及如何根据业务场景自动切换模型。Spring AI 模型路由架构核心组件// 1. 模型提供者接口 public interface ModelProvider { String generate(String prompt); boolean supports(ModelType type); } // 2. 路由策略接口 public interface ModelRouter { ModelProvider route(String scenario, MapString, Object context); }基于场景的路由实现2.1 配置类定义Configuration EnableConfigurationProperties(ModelRoutingProperties.class) public class ModelRoutingConfiguration { Bean public ModelRouter modelRouter( ListModelProvider providers, ModelRoutingProperties properties ) { return new ScenarioBasedModelRouter(providers, properties); } Bean ConditionalOnMissingBean public AiClient aiClient(ModelRouter router) { return new RoutingAiClient(router); } }2.2 路由属性配置spring: ai: routing: enabled: true default-model: gpt-3.5-turbo scenarios: creative-writing: model: gpt-4 temperature: 0.9 max-tokens: 2000 code-generation: model: claude-3-opus temperature: 0.2 max-tokens: 4000 >Component public class ScenarioBasedModelRouter implements ModelRouter { private final MapString, ModelProvider providerMap; private final ModelRoutingProperties properties; private final ModelProvider defaultProvider; Override public ModelProvider route(String scenario, MapString, Object context) { // 1. 检查场景配置 ModelRoutingProperties.ScenarioConfig config properties.getScenarios().get(scenario); if (config ! null) { return selectByScenario(config, context); } // 2. 智能路由 return intelligentRoute(context); } private ModelProvider selectByScenario( ScenarioConfig config, MapString, Object context ) { // 基于场景规则选择 if (high-accuracy.equals(config.getPriority())) { return providerMap.get(config.getModel()); } // 考虑成本因素 Double budget (Double) context.get(budget); if (budget ! null budget config.getMaxCost()) { return getCostEffectiveProvider(); } // 考虑响应时间 Integer timeout (Integer) context.get(timeout); if (timeout ! null timeout 5000) { return getFastProvider(); } return providerMap.get(config.getModel()); } private ModelProvider intelligentRoute(MapString, Object context) { // 基于内容分析的路由 String content (String) context.get(content); ModelType type analyzeContentType(content); return providers.stream() .filter(p - p.supports(type)) .findFirst() .orElse(defaultProvider); } }动态路由策略3.1 基于内容类型的路由Component public class ContentBasedRouter { public ModelProvider routeByContent(String content) { ContentType type analyzeContent(content); switch (type) { case CODE: return codeModelProvider(); case CREATIVE: return creativeModelProvider(); case TECHNICAL: return technicalModelProvider(); case MULTIMODAL: return multimodalModelProvider(); default: return defaultModelProvider(); } } private ContentType analyzeContent(String content) { // 使用规则或ML模型分析内容类型 if (containsCode(content)) return ContentType.CODE; if (containsCreativeMarkers(content)) return ContentType.CREATIVE; if (containsTechnicalTerms(content)) return ContentType.TECHNICAL; if (containsMultimediaRefs(content)) return ContentType.MULTIMODAL; return ContentType.GENERAL; } }3.2 基于性能指标的路由Component RefreshScope public class PerformanceBasedRouter { Autowired private ModelMetricsCollector metricsCollector; public ModelProvider routeByPerformance(String scenario) { ListModelPerformance performances metricsCollector.getRecentPerformance(scenario); return performances.stream() .min(Comparator.comparingDouble(p - calculateScore(p.getLatency(), p.getCost(), p.getAccuracy()) )) .map(ModelPerformance::getProvider) .orElse(defaultProvider); } private double calculateScore(double latency, double cost, double accuracy) { // 加权评分算法 return latency * 0.3 cost * 0.4 (1 - accuracy) * 0.3; } }注解驱动的路由4.1 定义路由注解Target({ElementType.METHOD, ElementType.TYPE}) Retention(RetentionPolicy.RUNTIME) public interface ModelRoute { String scenario() default ; ModelType type() default ModelType.GENERAL; double maxCost() default 0.1; int timeout() default 30000; }4.2 AOP 路由拦截器Aspect Component public class ModelRoutingAspect { Autowired private ModelRouter modelRouter; Around(annotation(modelRoute)) public Object routeModel(ProceedingJoinPoint joinPoint, ModelRoute modelRoute) { // 获取路由上下文 MapString, Object context buildContext(joinPoint, modelRoute); // 路由到合适的模型 ModelProvider provider modelRouter.route( modelRoute.scenario(), context ); // 执行模型调用 return executeWithProvider(joinPoint, provider); } private MapString, Object buildContext( ProceedingJoinPoint joinPoint, ModelRoute modelRoute ) { MapString, Object context new HashMap(); context.put(scenario, modelRoute.scenario()); context.put(type, modelRoute.type()); context.put(maxCost, modelRoute.maxCost()); context.put(timeout, modelRoute.timeout()); // 添加方法参数 Object[] args joinPoint.getArgs(); if (args.length 0 args[0] instanceof String) { context.put(content, args[0]); } return context; } }4.3 使用示例Service public class ContentService { ModelRoute(scenario creative-writing, type ModelType.CREATIVE) public String generateStory(String prompt) { // 自动路由到创意写作模型 return aiClient.generate(prompt); } ModelRoute(scenario code-generation, maxCost 0.05) public String generateCode(String requirement) { // 自动路由到代码生成模型 return aiClient.generate(requirement); } }动态配置更新5.1 配置热更新Component RefreshScope public class DynamicRoutingManager { Autowired private ModelRouter modelRouter; EventListener public void onConfigUpdate(EnvironmentChangeEvent event) { if (event.getKeys().stream() .anyMatch(k - k.startsWith(spring.ai.routing))) { refreshRoutingRules(); } } Scheduled(fixedDelay 60000) // 每分钟更新一次 public void updateRoutingBasedOnMetrics() { // 基于实时指标更新路由策略 updatePerformanceBasedRouting(); } }5.2 路由规则配置ConfigurationProperties(spring.ai.routing.rules) Data public class RoutingRules { private ListRoutingRule rules new ArrayList(); Data public static class RoutingRule { private String scenario; private String condition; private String model; private MapString, Object parameters; } }完整的服务示例6.1 路由服务Service public class SmartModelRoutingService { Autowired private ModelRouter modelRouter; Autowired private ModelMetricsService metricsService; Autowired private CostCalculator costCalculator; public CompletableFutureString generate( String prompt, String scenario, RoutingContext context ) { return CompletableFuture.supplyAsync(() - { // 1. 路由到合适的模型 ModelProvider provider modelRouter.route(scenario, context.toMap()); // 2. 执行生成 long start System.currentTimeMillis(); String result provider.generate(prompt); long latency System.currentTimeMillis() - start; // 3. 记录指标 metricsService.recordCall( scenario, provider.getName(), latency, result.length() ); // 4. 计算成本 double cost costCalculator.calculate( provider.getName(), prompt.length(), result.length() ); metricsService.recordCost(scenario, cost); return result; }); } }6.2 客户端封装Component public class RoutingAiClient { Autowired private SmartModelRoutingService routingService; public String generate(String prompt) { return generate(prompt, default, new RoutingContext()); } public String generate(String prompt, String scenario) { return generate(prompt, scenario, new RoutingContext()); } public String generate(String prompt, String scenario, RoutingContext context) { // 自动检测场景 if (auto.equals(scenario)) { scenario detectScenario(prompt); } return routingService.generate(prompt, scenario, context).join(); } private String detectScenario(String prompt) { // 使用简单规则或ML模型检测场景 if (prompt.contains(写一个) || prompt.contains(创作)) { return creative-writing; } else if (prompt.contains(代码) || prompt.contains(编程)) { return code-generation; } else if (prompt.contains(分析) || prompt.contains(总结)) { return data-analysis; } return general; } }监控和熔断7.1 熔断器配置Configuration public class CircuitBreakerConfig { Bean public CircuitBreakerFactory circuitBreakerFactory() { return new Resilience4JCircuitBreakerFactory(); } Bean public ModelProvider fallbackProvider() { return new FallbackModelProvider(); } }7.2 带熔断的路由Service CircuitBreaker(name modelService, fallbackMethod fallback) public class ResilientModelService { Autowired private ModelRouter modelRouter; public String generateWithCircuitBreaker(String prompt, String scenario) { ModelProvider provider modelRouter.route(scenario, Map.of()); return provider.generate(prompt); } public String fallback(String prompt, String scenario, Throwable t) { log.warn(Fallback triggered for scenario: {}, scenario, t); return 服务暂时不可用请稍后重试; } }总结Spring AI 动态模型路由的关键点多层路由策略 基于场景、内容、性能的多维度路由注解驱动 使用注解声明路由规则减少侵入性动态配置 支持热更新路由配置智能决策 基于历史数据和实时指标优化路由容错机制 熔断、降级、重试等保障可用性成本控制 根据预算自动选择性价比最优的模型这种架构可以实现根据业务场景自动选择最合适的模型动态调整路由策略成本控制和性能优化高可用性和容错能力3. 模型降级、熔断、容错机制企业高可用方案Spring AI 的高可用性方案需要结合降级、熔断、容错 机制来确保企业级应用的稳定性。以下是完整的实现方案一、核心架构设计多模型降级策略Component public class ModelDegradationManager { Autowired private ListAiModelProvider modelProviders; // 按优先级排序 public String executeWithDegradation(String prompt) { for (AiModelProvider provider : modelProviders) { try { return provider.generate(prompt); } catch (Exception e) { log.warn(Provider {} failed, trying next, provider.getName()); // 记录失败指标 Metrics.recordFailure(provider.getName()); } } throw new AllModelsFailedException(All AI models unavailable); } }熔断器实现Resilience4j集成# application.yml resilience4j: circuitbreaker: instances: openai-primary: failure-rate-threshold: 50 wait-duration-in-open-state: 10s permitted-number-of-calls-in-half-open-state: 3 sliding-window-size: 10 minimum-number-of-calls: 5 openai-fallback: failure-rate-threshold: 30 wait-duration-in-open-state: 5sConfiguration public class CircuitBreakerConfig { Bean public CircuitBreakerRegistry circuitBreakerRegistry() { return CircuitBreakerRegistry.ofDefaults(); } Bean CircuitBreaker(name openai-primary, fallbackMethod fallbackResponse) public String callPrimaryModel(String prompt) { return openAiClient.generate(prompt); } public String fallbackResponse(String prompt, Exception e) { // 1. 返回缓存结果 String cached cacheService.get(prompt); if (cached ! null) return cached; // 2. 调用降级模型 return fallbackModel.generate(prompt); // 3. 返回默认响应 return 系统繁忙请稍后重试; } }三、智能路由与负载均衡Component public class SmartModelRouter { Autowired private ModelHealthChecker healthChecker; private final WeightedRoundRobinSelector selector new WeightedRoundRobinSelector(); public AiModel selectOptimalModel(ModelRequest request) { // 1. 基于健康检查 ListAiModel healthyModels healthChecker.getHealthyModels(); // 2. 基于性能评分 healthyModels.sort(Comparator .comparingDouble(AiModel::getPerformanceScore) .reversed()); // 3. 基于成本控制企业级 if (isCostSensitive(request)) { return selectCostEffectiveModel(healthyModels); } // 4. 基于负载均衡 return selector.select(healthyModels); } }四、完整的容错处理链Service public class RobustAIService { Autowired private RetryTemplate retryTemplate; Autowired private Bulkhead bulkhead; Autowired private RateLimiter rateLimiter; Autowired private CacheService cacheService; public AiResponse executeWithFaultTolerance(AiRequest request) { return retryTemplate.execute(context - { try { // 1. 检查缓存 AiResponse cached cacheService.get(request); if (cached ! null) return cached; // 2. 限流保护 rateLimiter.acquirePermission(); // 3. 舱壁隔离 return bulkhead.executeSupplier(() - { // 4. 智能路由选择模型 AiModel model modelRouter.select(request); // 5. 执行请求带超时控制 return CompletableFuture.supplyAsync(() - model.generate(request) ).orTimeout(30, TimeUnit.SECONDS) .exceptionally(ex - { // 6. 异常处理 return handleException(ex, request); }).join(); }); } catch (Exception e) { // 7. 最终降级 return ultimateFallback(request); } }); } private AiResponse handleException(Exception ex, AiRequest request) { if (ex instanceof RateLimitException) { // 限流异常排队或拒绝 return queueRequest(request); } else if (ex instanceof TimeoutException) { // 超时快速失败 Metrics.recordTimeout(request.getModel()); throw new FastFailException(Request timeout); } else if (ex instanceof ModelOverloadedException) { // 模型过载切换实例 return switchModelInstance(request); } throw new RuntimeException(ex); } }五、监控与告警配置Configuration EnableScheduling public class MonitoringConfig { Scheduled(fixedDelay 60000) public void monitorModelHealth() { modelProviders.forEach(provider - { HealthStatus status checkHealth(provider); // 1. 记录指标 Metrics.recordLatency(provider.getName(), status.getLatency()); Metrics.recordSuccessRate(provider.getName(), status.getSuccessRate()); // 2. 触发告警 if (status.getSuccessRate() 95) { alertService.sendAlert( AI_MODEL_DEGRADED, provider.getName(), status ); } // 3. 自动调整权重 loadBalancer.adjustWeight( provider.getName(), calculateWeight(status) ); }); } EventListener public void onCircuitBreakerEvent(CircuitBreakerOnStateTransitionEvent event) { // 熔断状态变更通知 notificationService.notify( CircuitBreaker event.getCircuitBreakerName() changed to event.getStateTransition() ); } }六、配置管理# application-high-availability.yml spring: ai: high-availability: enabled: true strategies: degradation: enabled: true chain: [gpt-4, gpt-3.5, claude-2, ernie-bot] fallback-response: 服务暂时不可用 circuit-breaker: enabled: true failure-threshold: 50% timeout-ms: 30000 retry: max-attempts: 3 backoff-delay: 1000 bulkhead: max-concurrent-calls: 50 max-wait-duration: 1000 monitoring: metrics: enabled: true export: prometheus: enabled: true alert: webhooks: - url: ${ALERT_WEBHOOK_URL} thresholds: error-rate: 5% latency-p99: 5000ms七、最佳实践建议1. 分级降级策略一级降级 主模型 → 备用模型二级降级 复杂模型 → 简化模型三级降级 AI服务 → 规则引擎四级降级 返回缓存或默认响应2. 熔断恢复策略// 渐进式恢复 CircuitBreaker(name model-service, fallbackMethod fallback, successThreshold 3, // 连续成功3次才关闭熔断 recoveryStrategy gradual // 逐步增加流量 )3. 多区域容灾Primary Bean(name usEastModel) public AiModel usEastModel() { return new OpenAiModel(us-east-api.openai.com); } Bean(name euWestModel) ConditionalOnProperty(name region, havingValue eu) public AiModel euWestModel() { return new OpenAiModel(eu-west-api.openai.com); }4. 测试策略SpringBootTest TestPropertySource(properties { spring.ai.high-availability.test-modetrue, spring.ai.circuit-breaker.force-opentrue }) public class FaultToleranceTest { Test public void testCircuitBreaker() { // 模拟失败触发熔断 mockServer.setFailureRate(100); // 验证降级逻辑 assertThat(service.execute(request)) .isEqualTo(fallbackResponse); } }这个方案提供了从客户端到服务端的完整高可用保障可以根据具体业务需求进行调整和扩展。