The Voice Management API provides comprehensive functionality for creating, managing, and customizing AI voices. This includes voice cloning from audio samples, managing voice settings, accessing the voice library, and professional voice cloning (PVC) capabilities.
import {
ElevenLabsClient,
type Voice,
type VoiceSettings,
type BodyAddVoiceV1VoicesAddPost,
type BodyEditVoiceV1VoicesVoiceIdEditPost,
type GetVoicesResponse,
type GetVoicesV2Response,
type AddVoiceResponseModel,
type EditVoiceResponseModel,
type DeleteVoiceResponseModel,
type VoicesGetAllRequest,
type VoicesSearchRequest
} from 'elevenlabs';const client = new ElevenLabsClient();
// Get all available voices
const voicesResponse = await client.voices.getAll();
console.log('Available voices:');
voicesResponse.voices.forEach(voice => {
console.log(`- ${voice.name} (${voice.voice_id})`);
console.log(` Category: ${voice.category}`);
console.log(` Description: ${voice.description}`);
});// Search voices with filters and pagination
const searchResults = await client.voices.search({
search: "professional", // Text search
voice_type: "premade", // premade, cloned, or professional_clone
category: "professional", // professional, generated, etc.
language: "en",
gender: "male",
age: "adult",
accent: "american",
sort: "created_date", // name, created_date, usage_count
page_size: 20,
include_total_count: true
});
console.log(`Found ${searchResults.total_count} voices`);
searchResults.voices.forEach(voice => {
console.log(`${voice.name}: ${voice.preview_url}`);
});interface VoicesSearchRequest {
/** Text search query */
search?: string;
/** Voice type filter */
voice_type?: "personal" | "community" | "default" | "workspace" | "non-default";
/** Voice category */
category?: "professional" | "generated" | "premade";
/** Language filter (ISO 639-1) */
language?: string;
/** Gender filter */
gender?: "male" | "female";
/** Age category */
age?: "young" | "adult" | "senior";
/** Accent/region */
accent?: string;
/** Sort order */
sort?: "created_at_unix" | "name";
/** Sort direction */
sort_direction?: "asc" | "desc";
/** Results per page (max 100) */
page_size?: number;
/** Pagination cursor - use instead of start_after_voice_id for newer API versions */
next_page_token?: string;
/** Pagination cursor (legacy) */
start_after_voice_id?: string;
/** Filter by fine-tuning state for professional voices */
fine_tuning_state?: string;
/** Filter voices by collection ID */
collection_id?: string;
/** Include total count in response */
include_total_count?: boolean;
}// Get detailed voice information
const voiceDetails = await client.voices.get(
"21m00Tcm4TlvDq8ikWAM", // Voice ID
{
with_settings: true // Include voice settings
}
);
console.log('Voice details:', {
id: voiceDetails.voice_id,
name: voiceDetails.name,
description: voiceDetails.description,
category: voiceDetails.category,
settings: voiceDetails.settings,
samples: voiceDetails.samples?.length,
languages: voiceDetails.verified_languages
});import * as fs from 'fs';
// Create voice clone from audio files
const newVoice = await client.voices.add({
name: "My Custom Voice",
files: [
fs.createReadStream('voice_sample1.mp3'),
fs.createReadStream('voice_sample2.mp3'),
fs.createReadStream('voice_sample3.wav')
],
description: "A custom voice cloned from high-quality samples",
remove_background_noise: true,
labels: JSON.stringify({
accent: "american",
age: "adult",
gender: "female",
use_case: "narration"
})
});
console.log(`Created voice: ${newVoice.voice_id}`);interface BodyAddVoiceV1VoicesAddPost {
/** Voice name (shown in interface) */
name: string;
/** Audio files for cloning (2-25 recommended) */
files: File[] | fs.ReadStream[] | Blob[];
/** Remove background noise from samples */
remove_background_noise?: boolean;
/** Voice description */
description?: string;
/** Serialized JSON labels for categorization */
labels?: string;
}// Optimal voice cloning setup
const highQualityVoice = await client.voices.add({
name: "Professional Speaker Voice",
files: [
// 3-5 high-quality samples, 30 seconds each
fs.createReadStream('sample1_30sec.wav'),
fs.createReadStream('sample2_30sec.wav'),
fs.createReadStream('sample3_30sec.wav')
],
remove_background_noise: true,
description: "Professional speaker with clear articulation",
labels: JSON.stringify({
quality: "high",
speaker_type: "professional",
recording_environment: "studio",
accent: "neutral",
gender: "male",
age: "adult"
})
});// Update voice metadata and samples
const updatedVoice = await client.voices.edit(
"voice_id_here",
{
name: "Updated Voice Name",
description: "Enhanced voice description",
files: [
fs.createReadStream('additional_sample.mp3')
],
labels: JSON.stringify({
version: "2.0",
updated: new Date().toISOString()
})
}
);// Get current voice settings
const voiceSettings = await client.voices.getSettings("voice_id_here");
console.log('Current settings:', voiceSettings);// Update voice settings
const newSettings = await client.voices.editSettings(
"voice_id_here",
{
stability: 0.7, // 0.0-1.0 (higher = more stable)
similarity_boost: 0.8, // 0.0-1.0 (higher = more similar)
style: 0.3, // 0.0-1.0 (style exaggeration)
use_speaker_boost: true, // Enhanced similarity
speed: 1.1 // Speech speed multiplier
}
);
console.log('Updated settings:', newSettings);// Get system default voice settings
const defaultSettings = await client.voices.getDefaultSettings();
console.log('Default voice settings:', defaultSettings);interface VoiceSettings {
/**
* Voice stability (0.0-1.0)
* Lower: more emotional range, higher variability
* Higher: more consistent, less emotional range
*/
stability?: number;
/**
* Similarity boost (0.0-1.0)
* How closely AI follows original voice characteristics
*/
similarity_boost?: number;
/**
* Style exaggeration (0.0-1.0)
* Amplifies original speaker's style characteristics
* Increases computational cost and latency
*/
style?: number;
/**
* Enhanced speaker similarity
* Improves voice matching at cost of computational resources
*/
use_speaker_boost?: boolean;
/**
* Speech speed multiplier
* 1.0 = normal speed, <1.0 = slower, >1.0 = faster
*/
speed?: number;
}// Access Professional Voice Cloning features
const pvcClient = client.voices.pvc;
// Create high-quality voice generation
const professionalVoice = await pvcClient.createVoiceGeneration({
voice_name: "Executive Voice",
voice_description: "Professional executive speaker",
text: "Sample text for voice generation training",
accept_terms: true
});
console.log('PVC Generation ID:', professionalVoice.generation_id);// Monitor PVC training progress
const generationStatus = await pvcClient.getVoiceGeneration(
professionalVoice.generation_id
);
console.log('Training status:', generationStatus.state);
console.log('Progress:', generationStatus.progress_percentage);
if (generationStatus.state === "completed") {
const generatedVoiceId = generationStatus.generated_voice_id;
console.log('Generated voice ID:', generatedVoiceId);
}// Access community/shared voice library
const libraryVoices = await client.voices.getShared({
category: "professional",
language: "en",
gender: "female",
page_size: 50
});
console.log('Library voices:', libraryVoices.voices.length);// Add a shared voice to your collection
const addedSharedVoice = await client.voices.addSharingVoice(
"public_user_id",
"shared_voice_id",
{
new_name: "My Copy of Shared Voice"
}
);
console.log('Added shared voice:', addedSharedVoice.voice_id);import * as fs from 'fs';
// Find similar voices based on audio sample
const similarVoices = await client.voices.getSimilarLibraryVoices({
audio_file: fs.createReadStream('reference_audio.mp3'),
similarity_threshold: 0.8, // Values range from 0 to 2 (optional)
top_k: 10 // Number of most similar voices to return (optional, 1-100)
});
console.log('Similar voices found:');
similarVoices.voices.forEach(voice => {
console.log(`- ${voice.name}: ${voice.similarity_score}% match`);
});
// Interface for getSimilarLibraryVoices request
interface GetSimilarLibraryVoicesRequest {
/** Audio file for voice similarity matching */
audio_file?: File | fs.ReadStream | Blob;
/** Threshold for voice similarity between provided sample and library voices.
* Values range from 0 to 2. The smaller the value the more similar voices will be returned. */
similarity_threshold?: number;
/** Number of most similar voices to return. If similarity_threshold is provided,
* less than this number of voices may be returned. Values range from 1 to 100. */
top_k?: number;
}// Delete a custom voice
const deleteResult = await client.voices.delete("voice_id_to_delete");
console.log('Voice deleted:', deleteResult.success);// Check voice verification status
const voiceDetails = await client.voices.get("voice_id");
if (voiceDetails.voice_verification) {
console.log('Verification status:', voiceDetails.voice_verification.requires_verification);
console.log('Verification attempts:', voiceDetails.voice_verification.attempts_count);
}// Get voice with detailed sample information
const voiceWithSamples = await client.voices.get(voiceId, {
with_settings: true
});
console.log('Voice samples:');
voiceWithSamples.samples?.forEach((sample, index) => {
console.log(`Sample ${index + 1}:`);
console.log(` - File name: ${sample.file_name}`);
console.log(` - Duration: ${sample.duration_seconds}s`);
console.log(` - Hash: ${sample.hash}`);
});// Comprehensive voice information
const voiceInfo = await client.voices.get(voiceId);
console.log('Voice metadata:', {
id: voiceInfo.voice_id,
name: voiceInfo.name,
category: voiceInfo.category,
isOwner: voiceInfo.is_owner,
isLegacy: voiceInfo.is_legacy,
createdAt: new Date(voiceInfo.created_at_unix * 1000),
availableTiers: voiceInfo.available_for_tiers,
verifiedLanguages: voiceInfo.verified_languages?.map(lang => lang.language_code),
labels: voiceInfo.labels
});// Access fine-tuning data for professional voices
const voiceDetails = await client.voices.get(voiceId);
if (voiceDetails.fine_tuning) {
console.log('Fine-tuning status:', voiceDetails.fine_tuning.is_allowed_to_fine_tune);
console.log('Fine-tuning state:', voiceDetails.fine_tuning.state);
console.log('Model ID:', voiceDetails.fine_tuning.model_id);
}// Check voice safety controls
const voiceDetails = await client.voices.get(voiceId);
if (voiceDetails.safety_control) {
console.log('Safety control:', voiceDetails.safety_control);
// Handle safety restrictions if any
}// Configure voice sharing
const voiceDetails = await client.voices.get(voiceId);
if (voiceDetails.sharing) {
console.log('Sharing status:', voiceDetails.sharing.status);
console.log('Public owner ID:', voiceDetails.sharing.public_owner_id);
console.log('Original voice ID:', voiceDetails.sharing.original_voice_id);
}import { ElevenLabsError, ElevenLabsTimeoutError } from 'elevenlabs';
try {
const voice = await client.voices.add({
name: "Test Voice",
files: [fs.createReadStream('sample.mp3')]
});
console.log('Voice created:', voice.voice_id);
} catch (error) {
if (error instanceof ElevenLabsError) {
console.error('Voice creation error:', error.statusCode);
if (error.statusCode === 400) {
console.error('Invalid audio files or parameters');
} else if (error.statusCode === 401) {
console.error('Authentication error');
} else if (error.statusCode === 403) {
console.error('Insufficient permissions or quota exceeded');
} else if (error.statusCode === 422) {
console.error('Validation error:', error.body);
}
}
}// Best practices for voice cloning samples
const bestPracticeVoice = await client.voices.add({
name: "High Quality Voice",
files: [
// Requirements for optimal results:
// - 3-5 samples minimum (up to 25 maximum)
// - Each sample 30 seconds to 2 minutes
// - Clear, consistent speaker
// - Minimal background noise
// - Consistent audio quality
// - Various emotional expressions
fs.createReadStream('clear_speech_sample1.wav'), // 30-60 seconds
fs.createReadStream('emotional_sample2.wav'), // Different emotion
fs.createReadStream('narrative_sample3.wav'), // Different style
],
remove_background_noise: true,
description: "High-quality voice with multiple emotional expressions"
});// Recommended settings for different use cases
const settingsProfiles = {
// Natural conversation
conversational: {
stability: 0.4,
similarity_boost: 0.7,
style: 0.1,
use_speaker_boost: false,
speed: 1.0
},
// Professional narration
professional: {
stability: 0.7,
similarity_boost: 0.8,
style: 0.2,
use_speaker_boost: true,
speed: 0.95
},
// Expressive storytelling
expressive: {
stability: 0.3,
similarity_boost: 0.6,
style: 0.4,
use_speaker_boost: false,
speed: 1.1
}
};
// Apply profile
await client.voices.editSettings(voiceId, settingsProfiles.professional);// Efficient voice discovery
const [allVoices, searchResults, defaultSettings] = await Promise.all([
client.voices.getAll({ show_legacy: false }),
client.voices.search({ voice_type: "premade", page_size: 50 }),
client.voices.getDefaultSettings()
]);
console.log(`Total voices: ${allVoices.voices.length}`);
console.log(`Premade voices: ${searchResults.voices.length}`);// Cache frequently used voice data
class VoiceCache {
private cache = new Map<string, Voice>();
async getVoice(client: ElevenLabsClient, voiceId: string): Promise<Voice> {
if (!this.cache.has(voiceId)) {
const voice = await client.voices.get(voiceId, { with_settings: true });
this.cache.set(voiceId, voice);
}
return this.cache.get(voiceId)!;
}
clearCache() {
this.cache.clear();
}
}
const voiceCache = new VoiceCache();remove_background_noise for non-studio recordings