Spring Boot Starter for OpenAI integration providing auto-configuration for chat completion, embeddings, image generation, audio speech synthesis, audio transcription, and content moderation models. Includes high-level ChatClient API and conversation memory support.
The OpenAiAudioSpeechModel converts text to speech audio with multiple voice options and output formats.
import org.springframework.ai.openai.OpenAiAudioSpeechModel;
import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
import org.springframework.ai.openai.api.OpenAiAudioApi;
import org.springframework.ai.model.TextToSpeechModel;
import org.springframework.ai.model.TextToSpeechPrompt;
import org.springframework.ai.model.TextToSpeechResponse;
import org.springframework.ai.model.Speech;package org.springframework.ai.openai;
public class OpenAiAudioSpeechModel implements TextToSpeechModel {
// Generate speech from text
public TextToSpeechResponse call(String text);
// Generate speech with options
public TextToSpeechResponse call(TextToSpeechPrompt prompt);
// Stream speech generation
public Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt);
}package org.springframework.ai.openai;
public class OpenAiAudioSpeechOptions implements TextToSpeechOptions {
// Builder methods
public static OpenAiAudioSpeechOptions.Builder builder();
// Getters
public String getModel();
public String getInput();
public OpenAiAudioApi.SpeechRequest.Voice getVoice();
public OpenAiAudioApi.SpeechRequest.AudioResponseFormat getResponseFormat();
public Double getSpeed();
// Setters
public void setModel(String model);
public void setInput(String input);
public void setVoice(OpenAiAudioApi.SpeechRequest.Voice voice);
public void setResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat responseFormat);
public void setSpeed(Double speed);
}import org.springframework.ai.openai.OpenAiAudioSpeechModel;
import org.springframework.ai.model.TextToSpeechResponse;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;
@Service
public class SpeechService {
private final OpenAiAudioSpeechModel speechModel;
public SpeechService(OpenAiAudioSpeechModel speechModel) {
this.speechModel = speechModel;
}
public byte[] textToSpeech(String text) {
TextToSpeechResponse response = speechModel.call(text);
return response.getResult().getOutput();
}
public void textToSpeechFile(String text, String outputPath) throws IOException {
byte[] audioData = textToSpeech(text);
Files.write(Path.of(outputPath), audioData);
}
}import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;
import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;
import org.springframework.ai.model.TextToSpeechPrompt;
public byte[] textToSpeechWithOptions(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.voice(Voice.ALLOY)
.speed(1.0)
.responseFormat(AudioResponseFormat.MP3)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
TextToSpeechResponse response = speechModel.call(prompt);
return response.getResult().getOutput();
}import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;
public byte[] generateWithVoice(String text, Voice voice) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.voice(voice)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
TextToSpeechResponse response = speechModel.call(prompt);
return response.getResult().getOutput();
}
// Usage examples for different voices
public void demonstrateVoices(String text) throws IOException {
Files.write(Path.of("alloy.mp3"), generateWithVoice(text, Voice.ALLOY));
Files.write(Path.of("echo.mp3"), generateWithVoice(text, Voice.ECHO));
Files.write(Path.of("fable.mp3"), generateWithVoice(text, Voice.FABLE));
Files.write(Path.of("onyx.mp3"), generateWithVoice(text, Voice.ONYX));
Files.write(Path.of("nova.mp3"), generateWithVoice(text, Voice.NOVA));
Files.write(Path.of("shimmer.mp3"), generateWithVoice(text, Voice.SHIMMER));
}import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;
public byte[] generateMP3(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.responseFormat(AudioResponseFormat.MP3)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}
public byte[] generateOpus(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.responseFormat(AudioResponseFormat.OPUS)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}
public byte[] generateAAC(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.responseFormat(AudioResponseFormat.AAC)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}
public byte[] generateFLAC(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.responseFormat(AudioResponseFormat.FLAC)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}public byte[] generateFastSpeech(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.speed(1.5) // 1.5x speed
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}
public byte[] generateSlowSpeech(String text) {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.speed(0.75) // 0.75x speed
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return speechModel.call(prompt).getResult().getOutput();
}import java.io.FileOutputStream;
import java.io.OutputStream;
public void streamToFile(String text, String outputPath) throws IOException {
TextToSpeechResponse response = speechModel.call(text);
byte[] audioData = response.getResult().getOutput();
try (OutputStream os = new FileOutputStream(outputPath)) {
os.write(audioData);
}
}import reactor.core.publisher.Flux;
public void streamSpeechToFile(String text, String outputPath) throws IOException {
OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
.voice(Voice.NOVA)
.build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
Flux<TextToSpeechResponse> stream = speechModel.stream(prompt);
try (FileOutputStream fos = new FileOutputStream(outputPath)) {
stream.doOnNext(response -> {
try {
byte[] chunk = response.getResult().getOutput();
fos.write(chunk);
} catch (IOException e) {
throw new RuntimeException(e);
}
}).blockLast();
}
}Configure via application.properties:
# API Connection
spring.ai.openai.audio.speech.api-key=sk-...
spring.ai.openai.audio.speech.base-url=https://api.openai.com
spring.ai.openai.audio.speech.project-id=proj_...
spring.ai.openai.audio.speech.organization-id=org-...
# Model Configuration
spring.ai.openai.audio.speech.options.model=gpt-4o-mini-tts
spring.ai.openai.audio.speech.options.input=Your text here
spring.ai.openai.audio.speech.options.voice=alloy
spring.ai.openai.audio.speech.options.response-format=mp3
spring.ai.openai.audio.speech.options.speed=1.0gpt-4o-mini-tts - Fast and efficient (default)tts-1 - Standard qualitytts-1-hd - High definition audioALLOY - Neutral and balanced (default)ECHO - Male voiceFABLE - British accentONYX - Deep male voiceNOVA - Female voiceSHIMMER - Soft female voiceMP3 - MPEG audio (default, widely supported)OPUS - Opus codec (efficient for streaming)AAC - Advanced Audio Coding (iOS/Apple)FLAC - Lossless audio (highest quality)0.25 (quarter speed)1.0 (normal speed)4.0 (quadruple speed)import org.springframework.ai.openai.api.OpenAiApiException;
public byte[] safeTextToSpeech(String text) {
try {
TextToSpeechResponse response = speechModel.call(text);
return response.getResult().getOutput();
} catch (OpenAiApiException e) {
// Handle API errors (rate limits, invalid API key, etc.)
throw new RuntimeException("Speech generation failed: " + e.getMessage(), e);
}
}package org.springframework.ai.openai;
public static class Builder {
public Builder model(String model);
public Builder input(String input);
public Builder voice(OpenAiAudioApi.SpeechRequest.Voice voice);
public Builder responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat responseFormat);
public Builder speed(Double speed);
public OpenAiAudioSpeechOptions build();
}package org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest;
public enum Voice {
ALLOY("alloy"),
ECHO("echo"),
FABLE("fable"),
ONYX("onyx"),
NOVA("nova"),
SHIMMER("shimmer");
}package org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest;
public enum AudioResponseFormat {
MP3("mp3"),
OPUS("opus"),
AAC("aac"),
FLAC("flac");
}package org.springframework.ai.model;
public class TextToSpeechPrompt {
public TextToSpeechPrompt(String text);
public TextToSpeechPrompt(String text, TextToSpeechOptions options);
public String getText();
public TextToSpeechOptions getOptions();
}package org.springframework.ai.model;
public class TextToSpeechResponse {
public Speech getResult();
public List<Speech> getResults();
public TextToSpeechResponseMetadata getMetadata();
}package org.springframework.ai.model;
public class Speech {
public byte[] getOutput();
public SpeechMetadata getMetadata();
}tessl i tessl/maven-org-springframework-ai--spring-ai-starter-model-openai@1.1.1