CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-ollama

Spring Boot-compatible Ollama integration providing ChatModel and EmbeddingModel implementations for running large language models locally with support for streaming, tool calling, model management, and observability.

Overview
Eval results
Files

edge-cases.mddocs/examples/

Edge Cases and Advanced Scenarios

This document covers edge cases, boundary conditions, and advanced usage scenarios for Spring AI Ollama.

Table of Contents

  1. Null and Empty Input Handling
  2. Large Context Window Handling
  3. Concurrent Usage Patterns
  4. Streaming Edge Cases
  5. Tool Calling Edge Cases
  6. Model Management Edge Cases
  7. Error Recovery Patterns
  8. Resource Exhaustion Scenarios
  9. Network Failure Recovery
  10. Timeout Handling

Null and Empty Input Handling

Empty Prompt

// Empty string prompt
try {
    ChatResponse response = chatModel.call(new Prompt(""));
    // May return empty or error depending on model
} catch (Exception e) {
    // Handle error
}

// Null prompt (throws exception)
try {
    ChatResponse response = chatModel.call(new Prompt(null));
} catch (IllegalArgumentException e) {
    // Expected: null prompt not allowed
}

Empty Message List

// Empty message list
try {
    Prompt prompt = new Prompt(List.of());
    ChatResponse response = chatModel.call(prompt);
} catch (IllegalArgumentException e) {
    // Expected: at least one message required
}

Null Fields in Options

// Null model (uses default)
OllamaChatOptions options = OllamaChatOptions.builder()
    .model(null)  // Will use default model
    .temperature(0.7)
    .build();

// Null temperature (uses default)
OllamaChatOptions options = OllamaChatOptions.builder()
    .model("llama3")
    .temperature(null)  // Will use default temperature (0.8)
    .build();

Large Context Window Handling

Exceeding Context Length

// Generate very long prompt
StringBuilder longPrompt = new StringBuilder();
for (int i = 0; i < 10000; i++) {
    longPrompt.append("This is a very long text. ");
}

// Option 1: Enable truncation (default)
OllamaChatOptions options = OllamaChatOptions.builder()
    .model("llama3")
    .truncate(true)  // Auto-truncate to context length
    .build();

ChatResponse response = chatModel.call(new Prompt(longPrompt.toString(), options));

// Option 2: Increase context window
OllamaChatOptions largeContextOptions = OllamaChatOptions.builder()
    .model("llama3")
    .numCtx(8192)  // Increase from default 2048
    .build();

// Option 3: Disable truncation (will error if too long)
OllamaChatOptions strictOptions = OllamaChatOptions.builder()
    .model("llama3")
    .truncate(false)
    .build();

try {
    response = chatModel.call(new Prompt(longPrompt.toString(), strictOptions));
} catch (Exception e) {
    // Handle context length exceeded error
}

Conversation History Management

public class ConversationManager {
    private final int MAX_HISTORY_TOKENS = 3000;
    private final List<Message> history = new ArrayList<>();

    public void addMessage(Message message) {
        history.add(message);
        
        // Estimate tokens (rough: 1 token ≈ 4 characters)
        int estimatedTokens = history.stream()
            .mapToInt(m -> m.getContent().length() / 4)
            .sum();

        // Trim if exceeds limit
        while (estimatedTokens > MAX_HISTORY_TOKENS && history.size() > 2) {
            // Keep system message, remove oldest user/assistant messages
            if (!(history.get(1) instanceof SystemMessage)) {
                history.remove(1);
                estimatedTokens = history.stream()
                    .mapToInt(m -> m.getContent().length() / 4)
                    .sum();
            } else {
                break;
            }
        }
    }

    public List<Message> getHistory() {
        return new ArrayList<>(history);
    }
}

Concurrent Usage Patterns

Thread-Safe Shared Instance

@Service
public class ConcurrentChatService {

    // Single shared instance (thread-safe)
    private final OllamaChatModel chatModel;

    public ConcurrentChatService(OllamaApi ollamaApi) {
        this.chatModel = OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.LLAMA3.id())
                .build())
            .build();
    }

    public String chat(String message) {
        // Safe to call from multiple threads
        ChatResponse response = chatModel.call(new Prompt(message));
        return response.getResult().getOutput().getContent();
    }
}

// Usage from multiple threads
ExecutorService executor = Executors.newFixedThreadPool(10);
ConcurrentChatService service = new ConcurrentChatService(ollamaApi);

List<Future<String>> futures = new ArrayList<>();
for (int i = 0; i < 100; i++) {
    final int index = i;
    Future<String> future = executor.submit(() -> 
        service.chat("Question " + index)
    );
    futures.add(future);
}

// Wait for all to complete
for (Future<String> future : futures) {
    String response = future.get();
}

Parallel Batch Processing

public class ParallelBatchProcessor {

    private final OllamaEmbeddingModel embeddingModel;

    public List<float[]> processInParallel(List<String> texts, int parallelism) {
        return texts.parallelStream()
            .map(text -> {
                try {
                    return embeddingModel.embed(text);
                } catch (Exception e) {
                    logger.error("Failed to embed text", e);
                    return null;
                }
            })
            .filter(Objects::nonNull)
            .toList();
    }

    public Flux<float[]> processReactive(List<String> texts, int concurrency) {
        return Flux.fromIterable(texts)
            .flatMap(text -> 
                Mono.fromCallable(() -> embeddingModel.embed(text)),
                concurrency
            );
    }
}

Streaming Edge Cases

Handling Incomplete Streams

public class RobustStreamingHandler {

    public String handleStreamWithTimeout(OllamaChatModel chatModel, String prompt, Duration timeout) {
        StringBuilder result = new StringBuilder();
        CountDownLatch latch = new CountDownLatch(1);
        AtomicReference<Throwable> error = new AtomicReference<>();

        Flux<ChatResponse> stream = chatModel.stream(new Prompt(prompt));

        stream.subscribe(
            chunk -> result.append(chunk.getResult().getOutput().getContent()),
            err -> {
                error.set(err);
                latch.countDown();
            },
            latch::countDown
        );

        try {
            if (!latch.await(timeout.toMillis(), TimeUnit.MILLISECONDS)) {
                return result.toString() + " [TIMEOUT]";
            }
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
            return result.toString() + " [INTERRUPTED]";
        }

        if (error.get() != null) {
            throw new RuntimeException("Stream error", error.get());
        }

        return result.toString();
    }
}

Merging Streaming Responses

import org.springframework.ai.ollama.api.OllamaApiHelper;

public class StreamMerger {

    public OllamaApi.ChatResponse accumulateStream(Flux<OllamaApi.ChatResponse> stream) {
        return stream.reduce(
            null,
            (accumulated, current) -> {
                if (accumulated == null) {
                    return current;
                }
                return OllamaApiHelper.merge(accumulated, current);
            }
        ).block();
    }

    public void processStreamWithMerging(OllamaApi ollamaApi, ChatRequest request) {
        Flux<OllamaApi.ChatResponse> stream = ollamaApi.streamingChat(request);
        
        OllamaApi.ChatResponse accumulated = null;
        
        for (OllamaApi.ChatResponse chunk : stream.toIterable()) {
            if (accumulated == null) {
                accumulated = chunk;
            } else {
                accumulated = OllamaApiHelper.merge(accumulated, chunk);
            }

            // Check if streaming is done
            if (OllamaApiHelper.isStreamingDone(chunk)) {
                // Final chunk includes complete metadata
                System.out.println("Total tokens: " + chunk.evalCount());
                break;
            }

            // Check for tool calls
            if (OllamaApiHelper.isStreamingToolCall(chunk)) {
                // Handle tool call
            }
        }
    }
}

Tool Calling Edge Cases

Tool Execution Failures

public class RobustToolCalling {

    public String chatWithToolErrorHandling(OllamaChatModel chatModel, String message) {
        try {
            ChatResponse response = chatModel.call(new Prompt(message));
            return response.getResult().getOutput().getContent();
        } catch (Exception e) {
            if (e.getMessage().contains("tool execution failed")) {
                // Tool failed - provide fallback response
                return "I encountered an error accessing external tools. " +
                       "Please try again or rephrase your question.";
            }
            throw e;
        }
    }
}

Manual Tool Execution with Validation

public class ValidatingToolExecutor {

    public ChatResponse executeToolsManually(
        OllamaChatModel chatModel,
        String message,
        Predicate<ToolCall> validator
    ) {
        // Disable auto-execution
        OllamaChatOptions options = OllamaChatOptions.builder()
            .internalToolExecutionEnabled(false)
            .build();

        ChatResponse response = chatModel.call(new Prompt(message, options));
        AssistantMessage assistantMessage = response.getResult().getOutput();

        // Check for tool calls
        List<ToolCall> toolCalls = assistantMessage.getToolCalls();
        if (toolCalls == null || toolCalls.isEmpty()) {
            return response;
        }

        // Validate and execute tools
        List<Message> messages = new ArrayList<>();
        messages.add(new UserMessage(message));
        messages.add(assistantMessage);

        for (ToolCall toolCall : toolCalls) {
            if (!validator.test(toolCall)) {
                // Tool not allowed
                messages.add(new ToolResponseMessage(
                    "{\"error\": \"Tool not allowed\"}",
                    toolCall.name()
                ));
                continue;
            }

            try {
                // Execute tool
                String result = executeToolSafely(toolCall);
                messages.add(new ToolResponseMessage(result, toolCall.name()));
            } catch (Exception e) {
                messages.add(new ToolResponseMessage(
                    "{\"error\": \"" + e.getMessage() + "\"}",
                    toolCall.name()
                ));
            }
        }

        // Continue conversation with tool results
        return chatModel.call(new Prompt(messages));
    }

    private String executeToolSafely(ToolCall toolCall) {
        // Execute with timeout and error handling
        // ... implementation
        return "{}";
    }
}

Model Management Edge Cases

Model Pull with Progress Tracking

public class ModelPullTracker {

    private final OllamaApi ollamaApi;

    public void pullModelWithProgress(String modelName) {
        PullModelRequest request = new PullModelRequest(modelName);
        Flux<ProgressResponse> progress = ollamaApi.pullModel(request);

        AtomicLong lastCompleted = new AtomicLong(0);
        
        progress.subscribe(
            p -> {
                if (p.total() != null && p.completed() != null) {
                    double percent = (p.completed() * 100.0) / p.total();
                    
                    // Only log on significant progress
                    if (p.completed() - lastCompleted.get() > p.total() / 20) {
                        System.out.printf("Progress: %.1f%% - %s%n", percent, p.status());
                        lastCompleted.set(p.completed());
                    }
                } else {
                    System.out.println("Status: " + p.status());
                }
            },
            error -> System.err.println("Pull failed: " + error.getMessage()),
            () -> System.out.println("Pull complete!")
        );
    }
}

Handling Model Pull Timeout

public class TimeoutAwareModelManager {

    public boolean pullModelWithTimeout(
        OllamaModelManager manager,
        String modelName,
        Duration timeout
    ) {
        ExecutorService executor = Executors.newSingleThreadExecutor();
        
        Future<Void> future = executor.submit(() -> {
            manager.pullModel(modelName, PullModelStrategy.ALWAYS);
            return null;
        });

        try {
            future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
            return true;
        } catch (TimeoutException e) {
            future.cancel(true);
            logger.error("Model pull timed out after {}", timeout);
            return false;
        } catch (Exception e) {
            logger.error("Model pull failed", e);
            return false;
        } finally {
            executor.shutdown();
        }
    }
}

Model Version Conflicts

public class ModelVersionManager {

    public void ensureModelVersion(OllamaApi ollamaApi, String modelName) {
        // Check if model exists
        ListModelResponse response = ollamaApi.listModels();
        
        Optional<Model> existing = response.models().stream()
            .filter(m -> m.name().startsWith(modelName))
            .findFirst();

        if (existing.isPresent()) {
            Model model = existing.get();
            Instant modifiedAt = model.modifiedAt();
            
            // Check if model is old (e.g., > 30 days)
            if (modifiedAt.isBefore(Instant.now().minus(Duration.ofDays(30)))) {
                logger.info("Model {} is outdated, pulling latest", modelName);
                
                // Pull latest version
                PullModelRequest request = new PullModelRequest(modelName);
                ollamaApi.pullModel(request).blockLast();
            }
        } else {
            // Model doesn't exist, pull it
            PullModelRequest request = new PullModelRequest(modelName);
            ollamaApi.pullModel(request).blockLast();
        }
    }
}

Error Recovery Patterns

Retry with Fallback Model

@Service
public class ResilientChatService {

    private final OllamaChatModel primaryModel;
    private final OllamaChatModel fallbackModel;

    public ResilientChatService(OllamaApi ollamaApi) {
        // Primary: Large model
        this.primaryModel = OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.LLAMA3.id())
                .build())
            .build();

        // Fallback: Smaller, faster model
        this.fallbackModel = OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model(OllamaModel.QWEN_3_06B.id())
                .build())
            .build();
    }

    public String chat(String message) {
        try {
            return chatWithPrimary(message);
        } catch (HttpClientErrorException.NotFound e) {
            logger.warn("Primary model not found, using fallback");
            return chatWithFallback(message);
        } catch (HttpServerErrorException e) {
            logger.warn("Primary model error, using fallback");
            return chatWithFallback(message);
        } catch (ResourceAccessException e) {
            logger.warn("Network error with primary, using fallback");
            return chatWithFallback(message);
        }
    }

    private String chatWithPrimary(String message) {
        ChatResponse response = primaryModel.call(new Prompt(message));
        return response.getResult().getOutput().getContent();
    }

    private String chatWithFallback(String message) {
        ChatResponse response = fallbackModel.call(new Prompt(message));
        return response.getResult().getOutput().getContent();
    }
}

Circuit Breaker Pattern

public class CircuitBreakerChatService {

    private final OllamaChatModel chatModel;
    private final AtomicInteger failureCount = new AtomicInteger(0);
    private final AtomicBoolean circuitOpen = new AtomicBoolean(false);
    private final int FAILURE_THRESHOLD = 5;
    private final Duration RESET_TIMEOUT = Duration.ofMinutes(1);
    private Instant lastFailureTime;

    public Optional<String> chat(String message) {
        // Check circuit breaker
        if (circuitOpen.get()) {
            if (Duration.between(lastFailureTime, Instant.now()).compareTo(RESET_TIMEOUT) > 0) {
                // Try to reset circuit
                circuitOpen.set(false);
                failureCount.set(0);
            } else {
                return Optional.empty();  // Circuit open
            }
        }

        try {
            ChatResponse response = chatModel.call(new Prompt(message));
            
            // Success - reset failure count
            failureCount.set(0);
            
            return Optional.of(response.getResult().getOutput().getContent());
        } catch (Exception e) {
            // Failure - increment counter
            int failures = failureCount.incrementAndGet();
            lastFailureTime = Instant.now();

            if (failures >= FAILURE_THRESHOLD) {
                circuitOpen.set(true);
                logger.error("Circuit breaker opened after {} failures", failures);
            }

            return Optional.empty();
        }
    }

    public boolean isCircuitOpen() {
        return circuitOpen.get();
    }
}

Resource Exhaustion Scenarios

GPU Memory Management

public class GPUMemoryManager {

    public OllamaChatModel createMemoryEfficientModel(OllamaApi ollamaApi) {
        return OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model("llama3")
                .numGPU(0)           // CPU-only
                .lowVRAM(true)       // Enable low VRAM mode
                .numCtx(2048)        // Smaller context
                .numBatch(256)       // Smaller batch size
                .keepAlive("1m")     // Unload quickly
                .build())
            .build();
    }

    public OllamaChatModel createGPUOptimizedModel(OllamaApi ollamaApi) {
        return OllamaChatModel.builder()
            .ollamaApi(ollamaApi)
            .defaultOptions(OllamaChatOptions.builder()
                .model("llama3")
                .numGPU(-1)          // Use all GPU layers
                .numCtx(8192)        // Large context
                .numBatch(1024)      // Large batch
                .useMLock(true)      // Lock in RAM
                .keepAlive("30m")    // Keep loaded
                .build())
            .build();
    }
}

Disk Space Management

public class DiskSpaceManager {

    public void cleanupOldModels(OllamaApi ollamaApi, long maxTotalSizeBytes) {
        ListModelResponse response = ollamaApi.listModels();
        
        // Sort by last modified (oldest first)
        List<Model> sortedModels = response.models().stream()
            .sorted(Comparator.comparing(Model::modifiedAt))
            .toList();

        long totalSize = sortedModels.stream()
            .mapToLong(Model::size)
            .sum();

        // Delete oldest models until under limit
        OllamaModelManager manager = new OllamaModelManager(ollamaApi);
        
        for (Model model : sortedModels) {
            if (totalSize <= maxTotalSizeBytes) {
                break;
            }

            logger.info("Deleting old model: {} (size: {} MB)",
                model.name(), model.size() / (1024 * 1024));
            
            manager.deleteModel(model.name());
            totalSize -= model.size();
        }
    }
}

Network Failure Recovery

Exponential Backoff Retry

public class ExponentialBackoffRetry {

    public <T> T executeWithRetry(
        Supplier<T> operation,
        int maxAttempts,
        Duration initialDelay
    ) {
        int attempt = 0;
        Duration delay = initialDelay;

        while (attempt < maxAttempts) {
            try {
                return operation.get();
            } catch (ResourceAccessException e) {
                attempt++;
                
                if (attempt >= maxAttempts) {
                    throw new RuntimeException("Max retry attempts exceeded", e);
                }

                logger.warn("Attempt {} failed, retrying in {}", attempt, delay);
                
                try {
                    Thread.sleep(delay.toMillis());
                } catch (InterruptedException ie) {
                    Thread.currentThread().interrupt();
                    throw new RuntimeException("Interrupted during retry", ie);
                }

                // Exponential backoff
                delay = delay.multipliedBy(2);
            }
        }

        throw new RuntimeException("Should not reach here");
    }

    // Usage
    public String chatWithRetry(OllamaChatModel chatModel, String message) {
        return executeWithRetry(
            () -> {
                ChatResponse response = chatModel.call(new Prompt(message));
                return response.getResult().getOutput().getContent();
            },
            5,  // Max 5 attempts
            Duration.ofSeconds(1)  // Start with 1 second
        );
    }
}

Connection Pool Exhaustion

public class ConnectionPoolManager {

    public OllamaApi createPooledApi(int maxConnections) {
        // Configure RestClient with connection pool
        RestClient.Builder restClientBuilder = RestClient.builder()
            .requestInterceptor((request, body, execution) -> {
                // Add connection pool headers
                request.getHeaders().add("Connection", "keep-alive");
                return execution.execute(request, body);
            });

        // Configure WebClient with connection pool
        WebClient.Builder webClientBuilder = WebClient.builder()
            .codecs(configurer -> 
                configurer.defaultCodecs().maxInMemorySize(16 * 1024 * 1024)
            );

        return OllamaApi.builder()
            .baseUrl("http://localhost:11434")
            .restClientBuilder(restClientBuilder)
            .webClientBuilder(webClientBuilder)
            .build();
    }
}

Timeout Handling

Request-Level Timeouts

public class TimeoutHandler {

    public Optional<String> chatWithTimeout(
        OllamaChatModel chatModel,
        String message,
        Duration timeout
    ) {
        ExecutorService executor = Executors.newSingleThreadExecutor();
        
        Future<String> future = executor.submit(() -> {
            ChatResponse response = chatModel.call(new Prompt(message));
            return response.getResult().getOutput().getContent();
        });

        try {
            String result = future.get(timeout.toMillis(), TimeUnit.MILLISECONDS);
            return Optional.of(result);
        } catch (TimeoutException e) {
            future.cancel(true);
            logger.error("Request timed out after {}", timeout);
            return Optional.empty();
        } catch (Exception e) {
            logger.error("Request failed", e);
            return Optional.empty();
        } finally {
            executor.shutdown();
        }
    }
}

Streaming Timeout

public class StreamingTimeoutHandler {

    public String streamWithTimeout(
        OllamaChatModel chatModel,
        String message,
        Duration timeout
    ) {
        StringBuilder result = new StringBuilder();
        CountDownLatch latch = new CountDownLatch(1);
        AtomicBoolean timedOut = new AtomicBoolean(false);

        Flux<ChatResponse> stream = chatModel.stream(new Prompt(message))
            .timeout(timeout)
            .onErrorResume(TimeoutException.class, e -> {
                timedOut.set(true);
                return Flux.empty();
            });

        stream.subscribe(
            chunk -> result.append(chunk.getResult().getOutput().getContent()),
            error -> latch.countDown(),
            latch::countDown
        );

        try {
            latch.await();
        } catch (InterruptedException e) {
            Thread.currentThread().interrupt();
        }

        if (timedOut.get()) {
            return result.toString() + " [TIMEOUT - Partial response]";
        }

        return result.toString();
    }
}

Boundary Conditions

Maximum Token Generation

// Test maximum token generation
OllamaChatOptions options = OllamaChatOptions.builder()
    .model("llama3")
    .numPredict(-1)  // Generate until context fills or stop sequence
    .build();

// Or use -2 to fill entire context
OllamaChatOptions fillContextOptions = OllamaChatOptions.builder()
    .model("llama3")
    .numPredict(-2)  // Fill context window
    .numCtx(4096)
    .build();

Temperature Boundaries

// Minimum temperature (deterministic)
OllamaChatOptions deterministicOptions = OllamaChatOptions.builder()
    .model("llama3")
    .temperature(0.0)
    .seed(42)  // Fixed seed for reproducibility
    .build();

// Maximum temperature (highly creative)
OllamaChatOptions creativeOptions = OllamaChatOptions.builder()
    .model("llama3")
    .temperature(2.0)
    .build();

// Invalid temperature (will use default or error)
try {
    OllamaChatOptions invalidOptions = OllamaChatOptions.builder()
        .model("llama3")
        .temperature(-1.0)  // Invalid
        .build();
} catch (Exception e) {
    // Handle validation error
}

Context Window Boundaries

// Minimum context
OllamaChatOptions minContextOptions = OllamaChatOptions.builder()
    .model("llama3")
    .numCtx(128)  // Very small context
    .build();

// Maximum context (model-dependent)
OllamaChatOptions maxContextOptions = OllamaChatOptions.builder()
    .model(OllamaModel.MISTRAL_NEMO.id())
    .numCtx(131072)  // 128k tokens
    .build();

Advanced Scenarios

Multi-Model Comparison

public class ModelComparator {

    public record ComparisonResult(
        String modelName,
        String response,
        Duration responseTime,
        Integer tokenCount
    ) {}

    public List<ComparisonResult> compareModels(
        OllamaApi ollamaApi,
        String prompt,
        List<OllamaModel> models
    ) {
        List<ComparisonResult> results = new ArrayList<>();

        for (OllamaModel model : models) {
            OllamaChatModel chatModel = OllamaChatModel.builder()
                .ollamaApi(ollamaApi)
                .defaultOptions(OllamaChatOptions.builder()
                    .model(model.id())
                    .temperature(0.7)
                    .build())
                .build();

            Instant start = Instant.now();
            
            try {
                ChatResponse response = chatModel.call(new Prompt(prompt));
                Duration responseTime = Duration.between(start, Instant.now());
                
                results.add(new ComparisonResult(
                    model.id(),
                    response.getResult().getOutput().getContent(),
                    responseTime,
                    response.getMetadata().getUsage().getTotalTokens()
                ));
            } catch (Exception e) {
                logger.error("Model {} failed: {}", model.id(), e.getMessage());
            }
        }

        return results;
    }
}

Dynamic Model Selection

public class DynamicModelSelector {

    private final OllamaApi ollamaApi;

    public String chatWithBestAvailableModel(String message) {
        // Try models in order of preference
        List<String> preferredModels = List.of(
            "llama3:70b",
            "llama3",
            "mistral",
            "qwen3:0.6b"
        );

        OllamaModelManager manager = new OllamaModelManager(ollamaApi);

        for (String modelName : preferredModels) {
            if (manager.isModelAvailable(modelName)) {
                OllamaChatModel chatModel = OllamaChatModel.builder()
                    .ollamaApi(ollamaApi)
                    .defaultOptions(OllamaChatOptions.builder()
                        .model(modelName)
                        .build())
                    .build();

                try {
                    ChatResponse response = chatModel.call(new Prompt(message));
                    return response.getResult().getOutput().getContent();
                } catch (Exception e) {
                    logger.warn("Model {} failed, trying next", modelName);
                    continue;
                }
            }
        }

        throw new RuntimeException("No available models");
    }
}

For more examples, see:

  • Real-World Scenarios - Comprehensive usage examples
  • Quick Start Guide - Getting started
  • Reference Documentation - Complete API reference
tessl i tessl/maven-org-springframework-ai--spring-ai-ollama@1.1.1

docs

examples

edge-cases.md

real-world-scenarios.md

index.md

tile.json