or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

audio-processing.mdconversational-ai.mddubbing.mdindex.mdmusic.mdrealtime.mdstudio.mdtext-to-speech.mdtranscription.mdvoices.mdworkspace.md

audio-processing.mddocs/

0

# Audio Processing

1

2

## Audio Isolation

3

4

### audioIsolation.convert()

5

6

Remove background noise from audio.

7

8

```typescript { .api }

9

convert(

10

request: {

11

audio: File | Blob;

12

fileFormat?: string; // e.g., "mp3", "wav"

13

},

14

options?: RequestOptions

15

): Promise<ReadableStream<Uint8Array>>

16

```

17

18

**Example:**

19

```typescript

20

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";

21

import fs from "fs";

22

23

const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });

24

25

const noisyAudio = fs.readFileSync("noisy.mp3");

26

27

const clean = await client.audioIsolation.convert({

28

audio: noisyAudio,

29

fileFormat: "mp3"

30

});

31

32

const output = fs.createWriteStream("clean.mp3");

33

for await (const chunk of clean) {

34

output.write(chunk);

35

}

36

output.end();

37

```

38

39

## Forced Alignment

40

41

### forcedAlignment.create()

42

43

Align audio to text with precise timing for each word.

44

45

```typescript { .api }

46

create(

47

request: {

48

file: File | Blob;

49

text: string;

50

enabledSpooledFile?: boolean;

51

},

52

options?: RequestOptions

53

): Promise<ForcedAlignmentResponseModel>

54

55

interface ForcedAlignmentResponseModel {

56

characters: string[];

57

characterStartTimesSeconds: number[];

58

characterEndTimesSeconds: number[];

59

audioBase64?: string; // If enabledSpooledFile true

60

}

61

```

62

63

**Example:**

64

```typescript

65

const audioFile = fs.readFileSync("speech.mp3");

66

67

const alignment = await client.forcedAlignment.create({

68

file: audioFile,

69

text: "This is the spoken text",

70

enabledSpooledFile: true

71

});

72

73

alignment.characters.forEach((char, i) => {

74

console.log(

75

`"${char}": ${alignment.characterStartTimesSeconds[i]}s - ${alignment.characterEndTimesSeconds[i]}s`

76

);

77

});

78

79

if (alignment.audioBase64) {

80

const audioBuffer = Buffer.from(alignment.audioBase64, "base64");

81

fs.writeFileSync("aligned.mp3", audioBuffer);

82

}

83

```

84

85

## Speech-to-Speech

86

87

### speechToSpeech.convert()

88

89

Transform voice characteristics while preserving speech content.

90

91

```typescript { .api }

92

convert(

93

voiceId: string,

94

request: {

95

audio: File | Blob;

96

modelId?: string;

97

voiceSettings?: string;

98

seed?: number;

99

removeBackgroundNoise?: boolean;

100

},

101

options?: RequestOptions

102

): Promise<ReadableStream<Uint8Array>>

103

```

104

105

**Example:**

106

```typescript

107

const sourceAudio = fs.readFileSync("original_voice.mp3");

108

109

const transformed = await client.speechToSpeech.convert(

110

"21m00Tcm4TlvDq8ikWAM", // Target voice ID

111

{

112

audio: sourceAudio,

113

modelId: "eleven_multilingual_v2",

114

removeBackgroundNoise: true

115

}

116

);

117

118

const output = fs.createWriteStream("transformed_voice.mp3");

119

for await (const chunk of transformed) {

120

output.write(chunk);

121

}

122

output.end();

123

```

124

125

## Stream Consumption Pattern

126

127

**CRITICAL**: Do NOT use `.pipe()` with Web Streams API.

128

129

```typescript

130

// ❌ WRONG - Do not use .pipe()

131

const stream = await client.audioIsolation.convert({ audio: file });

132

stream.pipe(outputStream); // This will fail

133

134

// ✅ CORRECT - Use async iteration

135

const stream = await client.audioIsolation.convert({ audio: file });

136

for await (const chunk of stream) {

137

outputStream.write(chunk);

138

}

139

140

// ✅ CORRECT - Manual reader

141

const stream = await client.audioIsolation.convert({ audio: file });

142

const reader = stream.getReader();

143

while (true) {

144

const { done, value } = await reader.read();

145

if (done) break;

146

outputStream.write(value);

147

}

148

```

149

150

## Complete Examples

151

152

### Audio Cleanup Pipeline

153

154

```typescript

155

import { ElevenLabsClient } from "@elevenlabs/elevenlabs-js";

156

import fs from "fs";

157

158

const client = new ElevenLabsClient({ apiKey: process.env.ELEVENLABS_API_KEY });

159

160

// 1. Remove background noise

161

const noisyAudio = fs.readFileSync("recording.mp3");

162

const isolated = await client.audioIsolation.convert({

163

audio: noisyAudio,

164

fileFormat: "mp3"

165

});

166

167

// Save cleaned audio

168

const cleanFile = fs.createWriteStream("cleaned.mp3");

169

for await (const chunk of isolated) {

170

cleanFile.write(chunk);

171

}

172

cleanFile.end();

173

174

console.log("Audio cleaned");

175

```

176

177

### Forced Alignment for Subtitles

178

179

```typescript

180

const audioFile = fs.readFileSync("speech.mp3");

181

const spokenText = "Hello, welcome to our presentation. Today we'll discuss...";

182

183

const alignment = await client.forcedAlignment.create({

184

file: audioFile,

185

text: spokenText,

186

enabledSpooledFile: false

187

});

188

189

// Generate word-level timestamps

190

let currentWord = "";

191

let wordStart = 0;

192

const words: { word: string; start: number; end: number }[] = [];

193

194

alignment.characters.forEach((char, i) => {

195

if (char === " " || char === "\n") {

196

if (currentWord) {

197

words.push({

198

word: currentWord,

199

start: wordStart,

200

end: alignment.characterEndTimesSeconds[i - 1]

201

});

202

currentWord = "";

203

}

204

} else {

205

if (!currentWord) {

206

wordStart = alignment.characterStartTimesSeconds[i];

207

}

208

currentWord += char;

209

}

210

});

211

212

// Add last word

213

if (currentWord) {

214

words.push({

215

word: currentWord,

216

start: wordStart,

217

end: alignment.characterEndTimesSeconds[alignment.characters.length - 1]

218

});

219

}

220

221

words.forEach(w => {

222

console.log(`"${w.word}": ${w.start}s - ${w.end}s`);

223

});

224

```

225

226

### Voice Transformation

227

228

```typescript

229

// Transform speaking style

230

const recording = fs.readFileSync("my_recording.mp3");

231

232

const professional = await client.speechToSpeech.convert(

233

"professional_voice_id",

234

{

235

audio: recording,

236

modelId: "eleven_multilingual_v2",

237

removeBackgroundNoise: true,

238

seed: 12345 // For reproducibility

239

}

240

);

241

242

const output = fs.createWriteStream("professional_voice.mp3");

243

for await (const chunk of professional) {

244

output.write(chunk);

245

}

246

output.end();

247

248

console.log("Voice transformed");

249

```

250

251

## Important Notes

252

253

- **Audio Isolation**: Removes background noise, improves voice clarity

254

- **Forced Alignment**: Character-level precision, useful for subtitles/karaoke

255

- **Speech-to-Speech**: Transforms voice while preserving content and timing

256

- **Stream Handling**: Never use `.pipe()` with Web Streams API

257

- Use `for await...of` or manual reader

258

- Web Streams API returns `ReadableStream<Uint8Array>`

259

- **File Formats**: Supports MP3, WAV, M4A, FLAC, etc.

260

- **Background Noise Removal**: Available in multiple endpoints

261

- `audioIsolation.convert()`: Dedicated noise removal

262

- `speechToSpeech.convert()`: Optional parameter

263

- Voice cloning: Optional during voice creation

264