CtrlK
CommunityDocumentationLog inGet started
Tessl Logo

tessl/maven-org-springframework-ai--spring-ai-starter-model-openai

Spring Boot Starter for OpenAI integration providing auto-configuration for chat completion, embeddings, image generation, audio speech synthesis, audio transcription, and content moderation models. Includes high-level ChatClient API and conversation memory support.

Overview
Eval results
Files

audio-speech-model.mddocs/reference/

Audio Speech Model (Text-to-Speech)

The OpenAiAudioSpeechModel converts text to speech audio with multiple voice options and output formats.

Import

import org.springframework.ai.openai.OpenAiAudioSpeechModel;
import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
import org.springframework.ai.openai.api.OpenAiAudioApi;
import org.springframework.ai.model.TextToSpeechModel;
import org.springframework.ai.model.TextToSpeechPrompt;
import org.springframework.ai.model.TextToSpeechResponse;
import org.springframework.ai.model.Speech;

API

OpenAiAudioSpeechModel

package org.springframework.ai.openai;

public class OpenAiAudioSpeechModel implements TextToSpeechModel {
    // Generate speech from text
    public TextToSpeechResponse call(String text);

    // Generate speech with options
    public TextToSpeechResponse call(TextToSpeechPrompt prompt);

    // Stream speech generation
    public Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt);
}

OpenAiAudioSpeechOptions

package org.springframework.ai.openai;

public class OpenAiAudioSpeechOptions implements TextToSpeechOptions {
    // Builder methods
    public static OpenAiAudioSpeechOptions.Builder builder();

    // Getters
    public String getModel();
    public String getInput();
    public OpenAiAudioApi.SpeechRequest.Voice getVoice();
    public OpenAiAudioApi.SpeechRequest.AudioResponseFormat getResponseFormat();
    public Double getSpeed();

    // Setters
    public void setModel(String model);
    public void setInput(String input);
    public void setVoice(OpenAiAudioApi.SpeechRequest.Voice voice);
    public void setResponseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat responseFormat);
    public void setSpeed(Double speed);
}

Usage Examples

Basic Text-to-Speech

import org.springframework.ai.openai.OpenAiAudioSpeechModel;
import org.springframework.ai.model.TextToSpeechResponse;
import org.springframework.stereotype.Service;
import java.io.IOException;
import java.nio.file.Files;
import java.nio.file.Path;

@Service
public class SpeechService {

    private final OpenAiAudioSpeechModel speechModel;

    public SpeechService(OpenAiAudioSpeechModel speechModel) {
        this.speechModel = speechModel;
    }

    public byte[] textToSpeech(String text) {
        TextToSpeechResponse response = speechModel.call(text);
        return response.getResult().getOutput();
    }

    public void textToSpeechFile(String text, String outputPath) throws IOException {
        byte[] audioData = textToSpeech(text);
        Files.write(Path.of(outputPath), audioData);
    }
}

Speech with Options

import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;
import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;
import org.springframework.ai.model.TextToSpeechPrompt;

public byte[] textToSpeechWithOptions(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .voice(Voice.ALLOY)
        .speed(1.0)
        .responseFormat(AudioResponseFormat.MP3)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    TextToSpeechResponse response = speechModel.call(prompt);
    return response.getResult().getOutput();
}

Different Voices

import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.Voice;

public byte[] generateWithVoice(String text, Voice voice) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .voice(voice)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    TextToSpeechResponse response = speechModel.call(prompt);
    return response.getResult().getOutput();
}

// Usage examples for different voices
public void demonstrateVoices(String text) throws IOException {
    Files.write(Path.of("alloy.mp3"), generateWithVoice(text, Voice.ALLOY));
    Files.write(Path.of("echo.mp3"), generateWithVoice(text, Voice.ECHO));
    Files.write(Path.of("fable.mp3"), generateWithVoice(text, Voice.FABLE));
    Files.write(Path.of("onyx.mp3"), generateWithVoice(text, Voice.ONYX));
    Files.write(Path.of("nova.mp3"), generateWithVoice(text, Voice.NOVA));
    Files.write(Path.of("shimmer.mp3"), generateWithVoice(text, Voice.SHIMMER));
}

Different Audio Formats

import org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest.AudioResponseFormat;

public byte[] generateMP3(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .responseFormat(AudioResponseFormat.MP3)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

public byte[] generateOpus(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .responseFormat(AudioResponseFormat.OPUS)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

public byte[] generateAAC(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .responseFormat(AudioResponseFormat.AAC)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

public byte[] generateFLAC(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .responseFormat(AudioResponseFormat.FLAC)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

Variable Speed

public byte[] generateFastSpeech(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .speed(1.5) // 1.5x speed
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

public byte[] generateSlowSpeech(String text) {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .speed(0.75) // 0.75x speed
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
    return speechModel.call(prompt).getResult().getOutput();
}

Stream to File

import java.io.FileOutputStream;
import java.io.OutputStream;

public void streamToFile(String text, String outputPath) throws IOException {
    TextToSpeechResponse response = speechModel.call(text);
    byte[] audioData = response.getResult().getOutput();

    try (OutputStream os = new FileOutputStream(outputPath)) {
        os.write(audioData);
    }
}

Streaming Speech Generation

import reactor.core.publisher.Flux;

public void streamSpeechToFile(String text, String outputPath) throws IOException {
    OpenAiAudioSpeechOptions options = OpenAiAudioSpeechOptions.builder()
        .voice(Voice.NOVA)
        .build();

    TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);

    Flux<TextToSpeechResponse> stream = speechModel.stream(prompt);

    try (FileOutputStream fos = new FileOutputStream(outputPath)) {
        stream.doOnNext(response -> {
            try {
                byte[] chunk = response.getResult().getOutput();
                fos.write(chunk);
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }).blockLast();
    }
}

Configuration

Configure via application.properties:

# API Connection
spring.ai.openai.audio.speech.api-key=sk-...
spring.ai.openai.audio.speech.base-url=https://api.openai.com
spring.ai.openai.audio.speech.project-id=proj_...
spring.ai.openai.audio.speech.organization-id=org-...

# Model Configuration
spring.ai.openai.audio.speech.options.model=gpt-4o-mini-tts
spring.ai.openai.audio.speech.options.input=Your text here
spring.ai.openai.audio.speech.options.voice=alloy
spring.ai.openai.audio.speech.options.response-format=mp3
spring.ai.openai.audio.speech.options.speed=1.0

Model Options

Available Models

  • gpt-4o-mini-tts - Fast and efficient (default)
  • tts-1 - Standard quality
  • tts-1-hd - High definition audio

Voice Options

  • ALLOY - Neutral and balanced (default)
  • ECHO - Male voice
  • FABLE - British accent
  • ONYX - Deep male voice
  • NOVA - Female voice
  • SHIMMER - Soft female voice

Audio Response Formats

  • MP3 - MPEG audio (default, widely supported)
  • OPUS - Opus codec (efficient for streaming)
  • AAC - Advanced Audio Coding (iOS/Apple)
  • FLAC - Lossless audio (highest quality)

Speed Range

  • Minimum: 0.25 (quarter speed)
  • Default: 1.0 (normal speed)
  • Maximum: 4.0 (quadruple speed)

Error Handling

import org.springframework.ai.openai.api.OpenAiApiException;

public byte[] safeTextToSpeech(String text) {
    try {
        TextToSpeechResponse response = speechModel.call(text);
        return response.getResult().getOutput();
    } catch (OpenAiApiException e) {
        // Handle API errors (rate limits, invalid API key, etc.)
        throw new RuntimeException("Speech generation failed: " + e.getMessage(), e);
    }
}

Types

Builder

package org.springframework.ai.openai;

public static class Builder {
    public Builder model(String model);
    public Builder input(String input);
    public Builder voice(OpenAiAudioApi.SpeechRequest.Voice voice);
    public Builder responseFormat(OpenAiAudioApi.SpeechRequest.AudioResponseFormat responseFormat);
    public Builder speed(Double speed);
    public OpenAiAudioSpeechOptions build();
}

Voice

package org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest;

public enum Voice {
    ALLOY("alloy"),
    ECHO("echo"),
    FABLE("fable"),
    ONYX("onyx"),
    NOVA("nova"),
    SHIMMER("shimmer");
}

AudioResponseFormat

package org.springframework.ai.openai.api.OpenAiAudioApi.SpeechRequest;

public enum AudioResponseFormat {
    MP3("mp3"),
    OPUS("opus"),
    AAC("aac"),
    FLAC("flac");
}

TextToSpeechPrompt

package org.springframework.ai.model;

public class TextToSpeechPrompt {
    public TextToSpeechPrompt(String text);
    public TextToSpeechPrompt(String text, TextToSpeechOptions options);

    public String getText();
    public TextToSpeechOptions getOptions();
}

TextToSpeechResponse

package org.springframework.ai.model;

public class TextToSpeechResponse {
    public Speech getResult();
    public List<Speech> getResults();
    public TextToSpeechResponseMetadata getMetadata();
}

Speech

package org.springframework.ai.model;

public class Speech {
    public byte[] getOutput();
    public SpeechMetadata getMetadata();
}

Best Practices

  1. Choose appropriate format: MP3 for general use, OPUS for streaming, FLAC for highest quality
  2. Select voice carefully: Test different voices to find the best match for your use case
  3. Control speed: Use speed adjustment for accessibility or time constraints
  4. Handle large texts: Break very long texts into smaller chunks for better quality
  5. Cache results: Store generated audio to avoid regenerating the same content
tessl i tessl/maven-org-springframework-ai--spring-ai-starter-model-openai@1.1.1

docs

index.md

tile.json