Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.
Handle errors gracefully and implement robust retry logic for Ollama operations.
Spring AI Ollama provides built-in error handling and retry mechanisms through Spring Retry integration. Handle network failures, timeouts, rate limits, and model unavailability with configurable retry strategies.
Connection failures, timeouts, DNS issues:
try {
ChatResponse response = chatModel.call(new Prompt("Hello"));
} catch (ResourceAccessException e) {
// Network error - connection failed, timeout, etc.
logger.error("Network error communicating with Ollama", e);
}Requested model doesn't exist locally:
try {
OllamaChatOptions options = OllamaChatOptions.builder()
.model("nonexistent-model")
.build();
ChatResponse response = chatModel.call(new Prompt("Hello", options));
} catch (HttpClientErrorException e) {
if (e.getStatusCode() == HttpStatus.NOT_FOUND) {
logger.error("Model not found", e);
// Pull the model or use a different one
}
}Invalid parameters or request format:
try {
// Invalid configuration
OllamaChatOptions options = OllamaChatOptions.builder()
.temperature(-5.0) // Invalid temperature
.build();
ChatResponse response = chatModel.call(new Prompt("Hello", options));
} catch (HttpClientErrorException.BadRequest e) {
logger.error("Invalid request parameters", e);
}Ollama server errors (500, 503, etc.):
try {
ChatResponse response = chatModel.call(new Prompt("Hello"));
} catch (HttpServerErrorException e) {
logger.error("Ollama server error", e);
// Retry or fallback
}Spring AI Ollama uses RetryUtils.DEFAULT_RETRY_TEMPLATE by default:
// Default configuration:
// - Max 10 attempts
// - Exponential backoff starting at 2 seconds
// - Retries on RestClientException and TransientAiException
OllamaChatModel chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3)
.build())
// Uses default retry template automatically
.build();Create custom retry logic:
import org.springframework.retry.support.RetryTemplate;
import org.springframework.retry.policy.SimpleRetryPolicy;
import org.springframework.retry.backoff.ExponentialBackOffPolicy;
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(5) // Retry up to 5 times
.exponentialBackoff(1000, 2.0, 10000) // 1s initial, 2x multiplier, 10s max
.retryOn(ResourceAccessException.class) // Network errors
.retryOn(HttpServerErrorException.class) // Server errors
.build();
OllamaChatModel chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3)
.build())
.retryTemplate(retryTemplate) // Custom retry logic
.build();Retry with fixed delays:
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(3)
.fixedBackoff(2000) // 2 seconds between attempts
.retryOn(TransientAiException.class)
.build();Disable retries entirely:
import org.springframework.retry.support.RetryTemplate;
RetryTemplate noRetry = new RetryTemplate();
noRetry.setRetryPolicy(new NeverRetryPolicy());
OllamaChatModel chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(options)
.retryTemplate(noRetry)
.build();Monitor retry attempts with listeners:
import org.springframework.retry.RetryListener;
import org.springframework.retry.RetryContext;
import org.springframework.retry.RetryCallback;
public class LoggingRetryListener implements RetryListener {
private static final Logger logger = LoggerFactory.getLogger(LoggingRetryListener.class);
@Override
public <T, E extends Throwable> void onSuccess(
RetryContext context,
RetryCallback<T, E> callback,
T result) {
if (context.getRetryCount() > 0) {
logger.info("Retry succeeded after {} attempts", context.getRetryCount());
}
}
@Override
public <T, E extends Throwable> void onError(
RetryContext context,
RetryCallback<T, E> callback,
Throwable throwable) {
logger.warn("Retry attempt {} failed: {}",
context.getRetryCount(),
throwable.getMessage());
}
@Override
public <T, E extends Throwable> void close(
RetryContext context,
RetryCallback<T, E> callback,
Throwable throwable) {
if (throwable != null) {
logger.error("All retry attempts exhausted", throwable);
}
}
}
// Register listener
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(5)
.exponentialBackoff(1000, 2.0, 10000)
.withListener(new LoggingRetryListener())
.build();@Service
public class ResilientChatService {
private final OllamaChatModel chatModel;
private final OllamaChatModel fallbackModel;
public ResilientChatService(OllamaApi ollamaApi) {
// Primary model with retries
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(3)
.exponentialBackoff(Duration.ofSeconds(1), 2.0, Duration.ofSeconds(10))
.retryOn(TransientAiException.class)
.withListener(new LoggingRetryListener())
.build();
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3)
.build())
.retryTemplate(retryTemplate)
.build();
// Fallback model (smaller, faster)
this.fallbackModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.QWEN_3_06B) // Smaller model
.build())
.retryTemplate(retryTemplate)
.build();
}
public String chat(String message) {
try {
return chatWithPrimary(message);
} catch (Exception e) {
logger.warn("Primary model failed, using fallback", e);
return chatWithFallback(message);
}
}
private String chatWithPrimary(String message) {
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
private String chatWithFallback(String message) {
ChatResponse response = fallbackModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
}@Service
public class GracefulChatService {
private final OllamaChatModel chatModel;
private final CircuitBreaker circuitBreaker;
public GracefulChatService(OllamaApi ollamaApi) {
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(3)
.fixedBackoff(Duration.ofSeconds(2))
.retryOn(TransientAiException.class)
.build();
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3)
.build())
.retryTemplate(retryTemplate)
.build();
// Circuit breaker to prevent cascading failures
this.circuitBreaker = CircuitBreaker.ofDefaults("ollama-chat");
}
public Optional<String> chat(String message) {
try {
String response = circuitBreaker.executeSupplier(() -> {
ChatResponse chatResponse = chatModel.call(new Prompt(message));
return chatResponse.getResult().getOutput().getText();
});
return Optional.of(response);
} catch (Exception e) {
logger.error("Chat failed after retries and circuit breaker", e);
return Optional.empty(); // Graceful degradation
}
}
public String chatWithFallback(String message, String defaultResponse) {
return chat(message).orElse(defaultResponse);
}
}@Service
public class TimeoutAwareChatService {
private final OllamaChatModel chatModel;
private final ExecutorService executor;
public TimeoutAwareChatService(OllamaApi ollamaApi) {
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(OllamaModel.LLAMA3)
.build())
.build();
this.executor = Executors.newCachedThreadPool();
}
public String chatWithTimeout(String message, Duration timeout)
throws TimeoutException {
Future<String> future = executor.submit(() -> {
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
});
try {
return future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
} catch (TimeoutException e) {
future.cancel(true);
logger.error("Chat request timed out after {}", timeout);
throw e;
} catch (InterruptedException | ExecutionException e) {
logger.error("Chat request failed", e);
throw new RuntimeException("Chat failed", e);
}
}
public Optional<String> chatWithTimeoutSafe(String message, Duration timeout) {
try {
return Optional.of(chatWithTimeout(message, timeout));
} catch (TimeoutException e) {
return Optional.empty();
}
}
}@Service
public class ModelCheckingChatService {
private final OllamaModelManager modelManager;
private final OllamaChatModel chatModel;
private final String modelName;
public ModelCheckingChatService(OllamaApi ollamaApi) {
this.modelName = OllamaModel.LLAMA3.id();
this.modelManager = new OllamaModelManager(
ollamaApi,
ModelManagementOptions.builder()
.pullModelStrategy(PullModelStrategy.WHEN_MISSING)
.timeout(Duration.ofMinutes(10))
.maxRetries(2)
.build()
);
this.chatModel = OllamaChatModel.builder()
.ollamaApi(ollamaApi)
.defaultOptions(OllamaChatOptions.builder()
.model(modelName)
.build())
.build();
}
public String chat(String message) {
// Ensure model is available
ensureModelAvailable();
try {
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
} catch (HttpClientErrorException.NotFound e) {
// Model disappeared, try to recover
logger.warn("Model not found, attempting to pull", e);
modelManager.pullModel(modelName, PullModelStrategy.ALWAYS);
// Retry once after pulling
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
}
private void ensureModelAvailable() {
if (!modelManager.isModelAvailable(modelName)) {
logger.info("Model {} not available, pulling...", modelName);
modelManager.pullModel(modelName);
}
}
}@Service
public class BatchChatService {
private final OllamaChatModel chatModel;
public record BatchResult(
List<String> successful,
Map<Integer, String> failed
) {}
public BatchResult processBatch(List<String> messages) {
List<String> successful = new ArrayList<>();
Map<Integer, String> failed = new HashMap<>();
for (int i = 0; i < messages.size(); i++) {
String message = messages.get(i);
try {
String response = chat(message);
successful.add(response);
logger.info("Processed message {}/{}", i + 1, messages.size());
} catch (Exception e) {
logger.error("Failed to process message {}: {}",
i, e.getMessage());
failed.put(i, e.getMessage());
}
// Rate limiting
if (i < messages.size() - 1) {
try {
Thread.sleep(100); // 100ms between requests
} catch (InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
}
}
return new BatchResult(successful, failed);
}
private String chat(String message) {
ChatResponse response = chatModel.call(new Prompt(message));
return response.getResult().getOutput().getText();
}
public BatchResult processBatchWithRetry(List<String> messages, int maxRetries) {
List<String> successful = new ArrayList<>();
Map<Integer, String> failed = new HashMap<>();
for (int i = 0; i < messages.size(); i++) {
String message = messages.get(i);
boolean success = false;
for (int attempt = 0; attempt < maxRetries && !success; attempt++) {
try {
String response = chat(message);
successful.add(response);
success = true;
} catch (Exception e) {
if (attempt == maxRetries - 1) {
failed.put(i, e.getMessage());
logger.error("Failed after {} attempts: {}",
maxRetries, e.getMessage());
} else {
logger.warn("Attempt {} failed, retrying", attempt + 1);
sleep(Duration.ofSeconds((long) Math.pow(2, attempt)));
}
}
}
}
return new BatchResult(successful, failed);
}
private void sleep(Duration duration) {
try {
Thread.sleep(duration.toMillis());
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
}
}
}try {
ChatResponse response = chatModel.call(new Prompt("Hello"));
} catch (HttpStatusCodeException e) {
HttpStatusCode status = e.getStatusCode();
String responseBody = e.getResponseBodyAsString();
logger.error("HTTP error {}: {}", status.value(), responseBody);
if (status.is4xxClientError()) {
// Client error - check request parameters
handleClientError(e);
} else if (status.is5xxServerError()) {
// Server error - retry or fallback
handleServerError(e);
}
}import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.http.client.ClientHttpResponse;
public class CustomOllamaErrorHandler implements ResponseErrorHandler {
@Override
public boolean hasError(ClientHttpResponse response) throws IOException {
return response.getStatusCode().isError();
}
@Override
public void handleError(ClientHttpResponse response) throws IOException {
HttpStatusCode status = response.getStatusCode();
String body = new String(response.getBody().readAllBytes());
if (status == HttpStatus.NOT_FOUND) {
throw new ModelNotFoundException("Model not found: " + body);
} else if (status == HttpStatus.TOO_MANY_REQUESTS) {
throw new RateLimitException("Rate limit exceeded: " + body);
} else if (status.is5xxServerError()) {
throw new OllamaServerException("Server error: " + body);
} else {
throw new OllamaClientException("Client error: " + body);
}
}
}
// Apply to OllamaApi
OllamaApi ollamaApi = OllamaApi.builder()
.baseUrl("http://localhost:11434")
.responseErrorHandler(new CustomOllamaErrorHandler())
.build();| Strategy | Use Case | Configuration |
|---|---|---|
| Exponential Backoff | Most scenarios | Start: 1s, Multiplier: 2.0, Max: 10s |
| Fixed Backoff | Predictable delays | Delay: 2s |
| No Retry | Critical fast-fail | MaxAttempts: 1 |
| Aggressive Retry | Transient errors | MaxAttempts: 10, Start: 500ms |
// Problem: Network instability causing timeouts
// Solution: Retry with exponential backoff
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(5)
.exponentialBackoff(1000, 2.0, 10000)
.retryOn(ResourceAccessException.class)
.build();// Problem: Model takes time to load
// Solution: Longer initial delay, fewer retries
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(3)
.fixedBackoff(5000) // 5 second delay
.retryOn(HttpServerErrorException.ServiceUnavailable.class)
.build();// Problem: Too many requests
// Solution: Longer backoff with listener for logging
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(5)
.exponentialBackoff(5000, 2.0, 60000) // Start at 5s, max 60s
.retryOn(HttpClientErrorException.TooManyRequests.class)
.withListener(new LoggingRetryListener())
.build();@Test
void testRetryOnNetworkError() {
// Mock OllamaApi to throw exception
OllamaApi mockApi = mock(OllamaApi.class);
when(mockApi.chat(any()))
.thenThrow(new ResourceAccessException("Network error"))
.thenThrow(new ResourceAccessException("Network error"))
.thenReturn(validResponse);
RetryTemplate retryTemplate = RetryTemplate.builder()
.maxAttempts(3)
.fixedBackoff(100)
.build();
OllamaChatModel chatModel = OllamaChatModel.builder()
.ollamaApi(mockApi)
.retryTemplate(retryTemplate)
.build();
// Should succeed after 2 retries
ChatResponse response = chatModel.call(new Prompt("Hello"));
assertNotNull(response);
// Verify 3 attempts were made
verify(mockApi, times(3)).chat(any());
}ModelManagementOptionstessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1