or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mddatasets.mdindex.mdmedia.mdopenai-integration.mdprompts.mdpublic-api.mdtracing.md

media.mddocs/

0

# Media Handling

1

2

Comprehensive media handling system for images, PDFs, and other binary content. Langfuse automatically detects media in trace inputs/outputs, uploads to cloud storage, and replaces content with reference strings for efficient storage and retrieval.

3

4

## Capabilities

5

6

### LangfuseMedia Class

7

8

Wrapper class for media objects that handles upload and reference generation.

9

10

```typescript { .api }

11

class LangfuseMedia {

12

/** Optional reference object */

13

obj?: object;

14

/** Length of content in bytes */

15

contentLength: number | undefined;

16

/** SHA256 hash of content */

17

contentSha256Hash: string | undefined;

18

19

/**

20

* Creates a media object for upload

21

* @param params - Media source (one of: obj, base64DataUri, contentBytes, filePath)

22

*/

23

constructor(params: {

24

/** Optional reference object */

25

obj?: object;

26

/** Base64 data URI (e.g., data:image/png;base64,...) */

27

base64DataUri?: string;

28

/** Content type */

29

contentType?: MediaContentType;

30

/** Raw content bytes */

31

contentBytes?: Buffer;

32

/** File path to read from */

33

filePath?: string;

34

});

35

36

/**

37

* Returns a media reference string for storage

38

* Format: @@@langfuseMedia:type={contentType}|id={mediaId}|source={source}@@@

39

* @returns Reference string or undefined

40

*/

41

toJSON(): string | undefined;

42

43

/**

44

* Parses a media reference string into its components

45

* @param referenceString - Reference string to parse

46

* @returns Parsed media reference

47

*/

48

static parseReferenceString(referenceString: string): ParsedMediaReference;

49

50

/**

51

* Recursively traverses an object and replaces all media reference strings

52

* with actual base64 data URIs

53

* @param params - Resolution parameters

54

* @returns Object with resolved media content

55

*/

56

static resolveMediaReferences<T>(

57

params: LangfuseMediaResolveMediaReferencesParams<T>

58

): Promise<T>;

59

}

60

61

interface ParsedMediaReference {

62

/** Media ID */

63

mediaId: string;

64

/** Source identifier */

65

source: string;

66

/** Content type */

67

contentType: MediaContentType;

68

}

69

70

interface LangfuseMediaResolveMediaReferencesParams<T> {

71

/** Object to process */

72

obj: T;

73

/** Langfuse client for fetching media */

74

langfuseClient: LangfuseCore;

75

/** Resolution format (currently only "base64DataUri" supported) */

76

resolveWith: "base64DataUri";

77

/** Maximum traversal depth (default: 10) */

78

maxDepth?: number;

79

}

80

81

type MediaContentType =

82

// Images

83

| "image/png"

84

| "image/jpeg"

85

| "image/jpg"

86

| "image/webp"

87

| "image/gif"

88

| "image/svg+xml"

89

| "image/tiff"

90

| "image/bmp"

91

// Audio

92

| "audio/mpeg"

93

| "audio/mp3"

94

| "audio/wav"

95

| "audio/ogg"

96

| "audio/oga"

97

| "audio/aac"

98

| "audio/mp4"

99

| "audio/flac"

100

// Video

101

| "video/mp4"

102

| "video/webm"

103

// Text

104

| "text/plain"

105

| "text/html"

106

| "text/css"

107

| "text/csv"

108

// Application

109

| "application/pdf"

110

| "application/msword"

111

| "application/vnd.ms-excel"

112

| "application/zip"

113

| "application/json"

114

| "application/xml"

115

| "application/octet-stream";

116

```

117

118

**Usage Example:**

119

120

```typescript

121

import { Langfuse, LangfuseMedia } from 'langfuse';

122

123

const langfuse = new Langfuse();

124

125

// Create media from base64 data URI

126

const image = new LangfuseMedia({

127

base64DataUri: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUg...'

128

});

129

130

// Create media from file

131

const pdf = new LangfuseMedia({

132

filePath: '/path/to/document.pdf',

133

contentType: 'application/pdf'

134

});

135

136

// Create media from bytes

137

const audio = new LangfuseMedia({

138

contentBytes: audioBuffer,

139

contentType: 'audio/mpeg'

140

});

141

142

// Use in trace

143

const trace = langfuse.trace({

144

name: 'image-analysis',

145

input: {

146

image: image,

147

prompt: 'What is in this image?'

148

}

149

});

150

151

// Media is automatically uploaded and replaced with reference

152

await langfuse.flushAsync();

153

```

154

155

### Automatic Media Detection

156

157

Langfuse automatically detects and handles base64 data URIs in trace inputs and outputs.

158

159

```typescript

160

import { Langfuse } from 'langfuse';

161

162

const langfuse = new Langfuse();

163

164

// Base64 data URIs are automatically detected

165

const trace = langfuse.trace({

166

name: 'vision-analysis',

167

input: {

168

// This will be automatically uploaded and replaced

169

image: 'data:image/jpeg;base64,/9j/4AAQSkZJRg...',

170

question: 'Describe this image'

171

}

172

});

173

174

const generation = trace.generation({

175

name: 'gpt-4-vision',

176

model: 'gpt-4-vision-preview',

177

input: [

178

{

179

role: 'user',

180

content: [

181

{ type: 'text', text: 'What is in this image?' },

182

{

183

type: 'image_url',

184

// Automatically detected and uploaded

185

image_url: { url: 'data:image/png;base64,iVBORw0KGg...' }

186

}

187

]

188

}

189

]

190

});

191

192

generation.end({

193

output: {

194

description: 'A beautiful sunset over mountains'

195

}

196

});

197

198

await langfuse.flushAsync();

199

```

200

201

### Fetching Media

202

203

Retrieve media metadata and content.

204

205

```typescript { .api }

206

class Langfuse {

207

/**

208

* Fetches media metadata

209

* @param id - Media ID

210

* @returns Media metadata with download URL

211

*/

212

fetchMedia(id: string): Promise<GetMediaResponse>;

213

214

/**

215

* Recursively replaces media reference strings in an object with actual content

216

* @param params - Resolution parameters

217

* @returns Object with resolved media content

218

*/

219

resolveMediaReferences<T>(params: {

220

obj: T;

221

resolveWith: "base64DataUri";

222

maxDepth?: number;

223

}): Promise<T>;

224

}

225

226

interface GetMediaResponse {

227

/** Media ID */

228

mediaId: string;

229

/** Download URL (temporary, expires) */

230

url: string;

231

/** Content type */

232

contentType: MediaContentType;

233

/** Size in bytes */

234

contentLength: number;

235

/** Upload timestamp */

236

uploadedAt: string;

237

}

238

```

239

240

**Usage Example:**

241

242

```typescript

243

// Fetch media metadata

244

const mediaInfo = await langfuse.fetchMedia('media-id-123');

245

246

console.log(mediaInfo.contentType); // "image/png"

247

console.log(mediaInfo.contentLength); // 12345

248

console.log(mediaInfo.url); // Temporary download URL

249

250

// Download the media

251

const response = await fetch(mediaInfo.url);

252

const buffer = await response.arrayBuffer();

253

254

// Resolve media references in a trace

255

const trace = await langfuse.fetchTrace('trace-id-123');

256

257

// Trace input may contain media references like:

258

// @@@langfuseMedia:type=image/png|id=media-123|source=bytes@@@

259

260

// Resolve them to base64 data URIs

261

const resolvedTrace = await langfuse.resolveMediaReferences({

262

obj: trace.data,

263

resolveWith: 'base64DataUri',

264

maxDepth: 10

265

});

266

267

// Now resolvedTrace contains actual base64 data URIs

268

console.log(resolvedTrace.input.image); // "data:image/png;base64,..."

269

```

270

271

### Resolving Media References

272

273

Replace media reference strings with actual content throughout nested objects.

274

275

```typescript { .api }

276

/**

277

* Static method for resolving media references without a Langfuse instance

278

*/

279

LangfuseMedia.resolveMediaReferences<T>(

280

params: LangfuseMediaResolveMediaReferencesParams<T>

281

): Promise<T>;

282

```

283

284

**Usage Example:**

285

286

```typescript

287

import { Langfuse, LangfuseMedia } from 'langfuse';

288

289

const langfuse = new Langfuse();

290

291

// Fetch traces with media references

292

const traces = await langfuse.fetchTraces({

293

limit: 10

294

});

295

296

// Resolve all media references in all traces

297

for (const trace of traces.data) {

298

const resolved = await LangfuseMedia.resolveMediaReferences({

299

obj: trace,

300

langfuseClient: langfuse,

301

resolveWith: 'base64DataUri',

302

maxDepth: 5

303

});

304

305

// Now trace.input and trace.output contain actual media content

306

console.log(resolved.input);

307

}

308

```

309

310

## Media Reference Format

311

312

Langfuse uses a special string format for media references:

313

314

```

315

@@@langfuseMedia:type={contentType}|id={mediaId}|source={source}@@@

316

```

317

318

**Example:**

319

```

320

@@@langfuseMedia:type=image/png|id=550e8400-e29b-41d4-a716-446655440000|source=bytes@@@

321

```

322

323

**Components:**

324

- `type`: Media content type (MIME type)

325

- `id`: Unique media identifier (UUID)

326

- `source`: Source of the media (bytes, base64, file, etc.)

327

328

This format allows efficient storage and retrieval while maintaining references to the actual media content stored in cloud storage.

329

330

## Usage Patterns

331

332

### Vision Models

333

334

Handle image inputs for vision models.

335

336

```typescript

337

import { Langfuse, LangfuseMedia } from 'langfuse';

338

339

const langfuse = new Langfuse();

340

341

// Read image from file

342

const image = new LangfuseMedia({

343

filePath: './screenshot.png',

344

contentType: 'image/png'

345

});

346

347

const trace = langfuse.trace({

348

name: 'screenshot-analysis',

349

input: {

350

image: image,

351

task: 'Extract text from screenshot'

352

}

353

});

354

355

const generation = trace.generation({

356

name: 'gpt-4-vision',

357

model: 'gpt-4-vision-preview',

358

input: [

359

{

360

role: 'user',

361

content: [

362

{ type: 'text', text: 'Extract all text from this image' },

363

{

364

type: 'image_url',

365

image_url: {

366

url: image.toJSON() || '', // Use reference

367

detail: 'high'

368

}

369

}

370

]

371

}

372

]

373

});

374

375

// ... get response from model ...

376

377

generation.end({

378

output: { extracted_text: '...' }

379

});

380

381

await langfuse.flushAsync();

382

```

383

384

### PDF Processing

385

386

Handle PDF documents in traces.

387

388

```typescript

389

import { Langfuse, LangfuseMedia } from 'langfuse';

390

import fs from 'fs';

391

392

const langfuse = new Langfuse();

393

394

// Read PDF file

395

const pdfBuffer = fs.readFileSync('./document.pdf');

396

397

const pdf = new LangfuseMedia({

398

contentBytes: pdfBuffer,

399

contentType: 'application/pdf'

400

});

401

402

const trace = langfuse.trace({

403

name: 'pdf-analysis',

404

input: {

405

document: pdf,

406

analysis_type: 'summary'

407

}

408

});

409

410

const generation = trace.generation({

411

name: 'document-summary',

412

model: 'gpt-4',

413

input: 'Summarize the PDF document',

414

metadata: {

415

document_size: pdfBuffer.length

416

}

417

});

418

419

// ... process PDF and generate summary ...

420

421

generation.end({

422

output: { summary: '...' }

423

});

424

425

await langfuse.flushAsync();

426

```

427

428

### Audio Processing

429

430

Handle audio files for speech-to-text or analysis.

431

432

```typescript

433

import { Langfuse, LangfuseMedia } from 'langfuse';

434

435

const langfuse = new Langfuse();

436

437

const audio = new LangfuseMedia({

438

filePath: './recording.mp3',

439

contentType: 'audio/mpeg'

440

});

441

442

const trace = langfuse.trace({

443

name: 'speech-to-text',

444

input: {

445

audio: audio,

446

language: 'en'

447

}

448

});

449

450

const generation = trace.generation({

451

name: 'whisper-transcription',

452

model: 'whisper-1',

453

input: audio

454

});

455

456

generation.end({

457

output: { text: 'Transcribed text...' }

458

});

459

460

await langfuse.flushAsync();

461

```

462

463

### Multiple Media Items

464

465

Handle traces with multiple media items.

466

467

```typescript

468

import { Langfuse, LangfuseMedia } from 'langfuse';

469

470

const langfuse = new Langfuse();

471

472

const image1 = new LangfuseMedia({

473

base64DataUri: 'data:image/png;base64,...'

474

});

475

476

const image2 = new LangfuseMedia({

477

base64DataUri: 'data:image/jpeg;base64,...'

478

});

479

480

const trace = langfuse.trace({

481

name: 'multi-image-comparison',

482

input: {

483

images: [image1, image2],

484

task: 'Compare these images'

485

}

486

});

487

488

const generation = trace.generation({

489

name: 'image-comparison',

490

model: 'gpt-4-vision-preview',

491

input: [

492

{

493

role: 'user',

494

content: [

495

{ type: 'text', text: 'What are the differences?' },

496

{ type: 'image_url', image_url: { url: image1.toJSON() || '' } },

497

{ type: 'image_url', image_url: { url: image2.toJSON() || '' } }

498

]

499

}

500

]

501

});

502

503

generation.end({

504

output: { differences: ['...'] }

505

});

506

507

await langfuse.flushAsync();

508

```

509

510

### Retrieving Media for Analysis

511

512

Fetch traces and resolve media for local analysis.

513

514

```typescript

515

import { Langfuse } from 'langfuse';

516

import fs from 'fs';

517

518

const langfuse = new Langfuse();

519

520

// Fetch traces with media

521

const traces = await langfuse.fetchTraces({

522

name: 'vision-analysis',

523

limit: 10

524

});

525

526

for (const trace of traces.data) {

527

// Resolve media references to base64

528

const resolved = await langfuse.resolveMediaReferences({

529

obj: trace,

530

resolveWith: 'base64DataUri'

531

});

532

533

// Extract image from base64 data URI

534

if (resolved.input?.image) {

535

const base64Data = resolved.input.image.split(',')[1];

536

const buffer = Buffer.from(base64Data, 'base64');

537

538

// Save to file for analysis

539

fs.writeFileSync(`./trace-${trace.id}.png`, buffer);

540

}

541

}

542

```

543

544

## Content Type Support

545

546

Langfuse supports 50+ MIME types including:

547

548

**Images:**

549

- `image/jpeg`, `image/png`, `image/gif`, `image/webp`, `image/svg+xml`, `image/bmp`, `image/tiff`, `image/heic`, `image/heif`, `image/avif`

550

551

**Documents:**

552

- `application/pdf`, `text/plain`, `text/html`, `text/csv`, `text/markdown`

553

554

**Audio:**

555

- `audio/mpeg`, `audio/wav`, `audio/ogg`, `audio/mp4`, `audio/webm`, `audio/flac`, `audio/aac`

556

557

**Video:**

558

- `video/mp4`, `video/webm`, `video/ogg`, `video/mpeg`, `video/quicktime`, `video/x-msvideo`

559

560

**Archives:**

561

- `application/zip`, `application/gzip`, `application/x-tar`, `application/x-7z-compressed`

562

563

**Data:**

564

- `application/json`, `application/xml`, `application/octet-stream`

565

566

## Complete Media Handling Example

567

568

```typescript

569

import { Langfuse, LangfuseMedia } from 'langfuse';

570

import OpenAI from 'openai';

571

import fs from 'fs';

572

573

const langfuse = new Langfuse();

574

const openai = new OpenAI();

575

576

// Step 1: Create media objects

577

const userImage = new LangfuseMedia({

578

filePath: './uploads/user-photo.jpg',

579

contentType: 'image/jpeg'

580

});

581

582

const referenceImage = new LangfuseMedia({

583

base64DataUri: 'data:image/png;base64,iVBORw0KGg...'

584

});

585

586

// Step 2: Create trace with media

587

const trace = langfuse.trace({

588

name: 'image-comparison-workflow',

589

userId: 'user-123',

590

input: {

591

user_image: userImage,

592

reference_image: referenceImage,

593

task: 'Compare image similarity'

594

}

595

});

596

597

// Step 3: Use with OpenAI Vision

598

const generation = trace.generation({

599

name: 'vision-comparison',

600

model: 'gpt-4-vision-preview',

601

input: [

602

{

603

role: 'user',

604

content: [

605

{

606

type: 'text',

607

text: 'Compare these two images and describe their similarities and differences'

608

},

609

{

610

type: 'image_url',

611

image_url: {

612

url: userImage.toJSON() || '',

613

detail: 'high'

614

}

615

},

616

{

617

type: 'image_url',

618

image_url: {

619

url: referenceImage.toJSON() || '',

620

detail: 'high'

621

}

622

}

623

]

624

}

625

]

626

});

627

628

// Make actual API call

629

const response = await openai.chat.completions.create({

630

model: 'gpt-4-vision-preview',

631

messages: [

632

{

633

role: 'user',

634

content: [

635

{

636

type: 'text',

637

text: 'Compare these images'

638

},

639

{

640

type: 'image_url',

641

image_url: { url: userImage.toJSON() || '' }

642

},

643

{

644

type: 'image_url',

645

image_url: { url: referenceImage.toJSON() || '' }

646

}

647

]

648

}

649

],

650

max_tokens: 500

651

});

652

653

generation.end({

654

output: response.choices[0].message,

655

usage: {

656

input: response.usage?.prompt_tokens,

657

output: response.usage?.completion_tokens,

658

total: response.usage?.total_tokens

659

}

660

});

661

662

trace.update({

663

output: {

664

comparison: response.choices[0].message.content

665

}

666

});

667

668

// Step 4: Flush to upload media

669

await langfuse.flushAsync();

670

671

// Step 5: Later, retrieve and resolve media

672

const fetchedTrace = await langfuse.fetchTrace(trace.id);

673

674

const resolvedTrace = await langfuse.resolveMediaReferences({

675

obj: fetchedTrace.data,

676

resolveWith: 'base64DataUri'

677

});

678

679

// Save resolved images

680

if (resolvedTrace.input?.user_image) {

681

const base64 = resolvedTrace.input.user_image.split(',')[1];

682

fs.writeFileSync('./resolved-user-image.jpg', Buffer.from(base64, 'base64'));

683

}

684

685

console.log('Trace URL:', trace.getTraceUrl());

686

```

687