Interface for text-to-speech models that convert text into spoken audio with support for voice selection, output formats, and audio customization options.
Core type definition for text-to-speech model implementations.
/**
* Core text-to-speech model type
*/
type SpeechModelV2 = {
/** API specification version */
specificationVersion: 'v2';
/** Provider identifier (e.g., 'openai', 'elevenlabs') */
provider: string;
/** Model identifier (e.g., 'tts-1', 'eleven_multilingual_v2') */
modelId: string;
/** Generate speech audio from text */
doGenerate(options: SpeechModelV2CallOptions): PromiseLike<SpeechModelV2Result>;
};Configuration options for speech generation calls.
/**
* Configuration options for speech generation
*/
interface SpeechModelV2CallOptions {
/** Text to convert to speech */
text: string;
/** Voice identifier or name */
voice?: string;
/** Audio output format (e.g., 'mp3', 'wav', 'opus') */
outputFormat?: string;
/** Additional instructions for speech generation */
instructions?: string;
/** Speech speed multiplier (e.g., 0.5 for half speed, 2.0 for double speed) */
speed?: number;
/** Language code for the text (e.g., 'en-US', 'es-ES') */
language?: string;
/** Provider-specific options */
providerOptions?: SpeechModelV2ProviderOptions;
/** Abort signal for cancellation */
abortSignal?: AbortSignal;
/** Custom HTTP headers */
headers?: Record<string, string | undefined>;
}
/**
* Provider-specific options for speech generation
*/
type SpeechModelV2ProviderOptions = Record<string, Record<string, JSONValue>>;Response structure containing generated audio and metadata.
/**
* Result from speech generation
*/
interface SpeechModelV2Result {
/** Generated audio data as string or bytes */
audio: string | Uint8Array;
/** Warnings from the generation */
warnings: SpeechModelV2CallWarning[];
/** Request details */
request?: { body?: unknown };
/** Response details (required) */
response: {
timestamp: Date;
modelId: string;
headers?: SharedV2Headers;
body?: unknown;
};
/** Provider-specific metadata */
providerMetadata?: Record<string, Record<string, JSONValue>>;
}Warnings that can be returned from speech generation calls.
/**
* Warning types for speech generation calls
*/
type SpeechModelV2CallWarning =
| { type: 'unsupported-setting'; setting: keyof SpeechModelV2CallOptions; details?: string }
| { type: 'other'; message: string };Usage Examples:
import { SpeechModelV2 } from "@ai-sdk/provider";
// Basic text-to-speech
const model: SpeechModelV2 = provider.speechModel('tts-1');
const result = await model.doGenerate({
text: 'Hello, welcome to our AI-powered application. How can I help you today?',
voice: 'alloy',
outputFormat: 'mp3'
});
// Save audio to file (Node.js)
import fs from 'fs';
fs.writeFileSync('output.mp3', result.audio);
// Play audio in browser
const audioBlob = new Blob([result.audio], { type: 'audio/mpeg' });
const audioUrl = URL.createObjectURL(audioBlob);
const audio = new Audio(audioUrl);
audio.play();
// Advanced speech generation with customization
const customSpeech = await model.doGenerate({
text: 'This is a demonstration of advanced speech synthesis capabilities.',
voice: 'nova',
speed: 0.9,
language: 'en-US',
instructions: 'Speak in a professional, calm tone with clear pronunciation',
providerOptions: {
openai: {
model: 'tts-1-hd'
}
}
});
// Multi-language speech
const spanishSpeech = await model.doGenerate({
text: 'Hola, bienvenido a nuestra aplicación. ¿Cómo puedo ayudarte hoy?',
voice: 'alloy',
language: 'es-ES',
outputFormat: 'wav'
});
// Handle different output formats
const formats = ['mp3', 'wav', 'opus'];
const audioFiles = await Promise.all(
formats.map(async format => {
const result = await model.doGenerate({
text: 'This is a test of different audio formats.',
outputFormat: format
});
return { format, audio: result.audio };
})
);
// Check for warnings and handle unsupported features
const result = await model.doGenerate({
text: 'Testing speech generation',
voice: 'custom-voice',
speed: 3.0, // May be unsupported
outputFormat: 'flac' // May be unsupported
});
if (result.warnings) {
result.warnings.forEach(warning => {
if (warning.type === 'unsupported-setting') {
console.warn(`Setting '${warning.setting}' is not supported by this provider`);
}
});
}
// Streaming or real-time speech (if supported by provider)
const longText = `
This is a longer piece of text that we want to convert to speech.
It contains multiple sentences and might benefit from streaming generation
to reduce latency for the user experience.
`;
const speechResult = await model.doGenerate({
text: longText,
voice: 'echo',
providerOptions: {
elevenlabs: {
stability: 0.75,
similarity_boost: 0.85,
optimize_streaming_latency: 3
}
}
});
// Voice cloning (provider-specific)
const clonedVoice = await model.doGenerate({
text: 'This text will be spoken in a cloned voice.',
providerOptions: {
elevenlabs: {
voice_id: 'custom-voice-id-here',
voice_settings: {
stability: 0.71,
similarity_boost: 0.5,
style: 0.0,
use_speaker_boost: true
}
}
}
});