0
# Media Handling
1
2
Comprehensive media handling system for images, PDFs, and other binary content. Langfuse automatically detects media in trace inputs/outputs, uploads to cloud storage, and replaces content with reference strings for efficient storage and retrieval.
3
4
## Capabilities
5
6
### LangfuseMedia Class
7
8
Wrapper class for media objects that handles upload and reference generation.
9
10
```typescript { .api }
11
class LangfuseMedia {
12
/** Optional reference object */
13
obj?: object;
14
/** Length of content in bytes */
15
contentLength: number | undefined;
16
/** SHA256 hash of content */
17
contentSha256Hash: string | undefined;
18
19
/**
20
* Creates a media object for upload
21
* @param params - Media source (one of: obj, base64DataUri, contentBytes, filePath)
22
*/
23
constructor(params: {
24
/** Optional reference object */
25
obj?: object;
26
/** Base64 data URI (e.g., data:image/png;base64,...) */
27
base64DataUri?: string;
28
/** Content type */
29
contentType?: MediaContentType;
30
/** Raw content bytes */
31
contentBytes?: Buffer;
32
/** File path to read from */
33
filePath?: string;
34
});
35
36
/**
37
* Returns a media reference string for storage
38
* Format: @@@langfuseMedia:type={contentType}|id={mediaId}|source={source}@@@
39
* @returns Reference string or undefined
40
*/
41
toJSON(): string | undefined;
42
43
/**
44
* Parses a media reference string into its components
45
* @param referenceString - Reference string to parse
46
* @returns Parsed media reference
47
*/
48
static parseReferenceString(referenceString: string): ParsedMediaReference;
49
50
/**
51
* Recursively traverses an object and replaces all media reference strings
52
* with actual base64 data URIs
53
* @param params - Resolution parameters
54
* @returns Object with resolved media content
55
*/
56
static resolveMediaReferences<T>(
57
params: LangfuseMediaResolveMediaReferencesParams<T>
58
): Promise<T>;
59
}
60
61
interface ParsedMediaReference {
62
/** Media ID */
63
mediaId: string;
64
/** Source identifier */
65
source: string;
66
/** Content type */
67
contentType: MediaContentType;
68
}
69
70
interface LangfuseMediaResolveMediaReferencesParams<T> {
71
/** Object to process */
72
obj: T;
73
/** Langfuse client for fetching media */
74
langfuseClient: LangfuseCore;
75
/** Resolution format (currently only "base64DataUri" supported) */
76
resolveWith: "base64DataUri";
77
/** Maximum traversal depth (default: 10) */
78
maxDepth?: number;
79
}
80
81
type MediaContentType =
82
// Images
83
| "image/png"
84
| "image/jpeg"
85
| "image/jpg"
86
| "image/webp"
87
| "image/gif"
88
| "image/svg+xml"
89
| "image/tiff"
90
| "image/bmp"
91
// Audio
92
| "audio/mpeg"
93
| "audio/mp3"
94
| "audio/wav"
95
| "audio/ogg"
96
| "audio/oga"
97
| "audio/aac"
98
| "audio/mp4"
99
| "audio/flac"
100
// Video
101
| "video/mp4"
102
| "video/webm"
103
// Text
104
| "text/plain"
105
| "text/html"
106
| "text/css"
107
| "text/csv"
108
// Application
109
| "application/pdf"
110
| "application/msword"
111
| "application/vnd.ms-excel"
112
| "application/zip"
113
| "application/json"
114
| "application/xml"
115
| "application/octet-stream";
116
```
117
118
**Usage Example:**
119
120
```typescript
121
import { Langfuse, LangfuseMedia } from 'langfuse';
122
123
const langfuse = new Langfuse();
124
125
// Create media from base64 data URI
126
const image = new LangfuseMedia({
127
base64DataUri: 'data:image/png;base64,iVBORw0KGgoAAAANSUhEUg...'
128
});
129
130
// Create media from file
131
const pdf = new LangfuseMedia({
132
filePath: '/path/to/document.pdf',
133
contentType: 'application/pdf'
134
});
135
136
// Create media from bytes
137
const audio = new LangfuseMedia({
138
contentBytes: audioBuffer,
139
contentType: 'audio/mpeg'
140
});
141
142
// Use in trace
143
const trace = langfuse.trace({
144
name: 'image-analysis',
145
input: {
146
image: image,
147
prompt: 'What is in this image?'
148
}
149
});
150
151
// Media is automatically uploaded and replaced with reference
152
await langfuse.flushAsync();
153
```
154
155
### Automatic Media Detection
156
157
Langfuse automatically detects and handles base64 data URIs in trace inputs and outputs.
158
159
```typescript
160
import { Langfuse } from 'langfuse';
161
162
const langfuse = new Langfuse();
163
164
// Base64 data URIs are automatically detected
165
const trace = langfuse.trace({
166
name: 'vision-analysis',
167
input: {
168
// This will be automatically uploaded and replaced
169
image: 'data:image/jpeg;base64,/9j/4AAQSkZJRg...',
170
question: 'Describe this image'
171
}
172
});
173
174
const generation = trace.generation({
175
name: 'gpt-4-vision',
176
model: 'gpt-4-vision-preview',
177
input: [
178
{
179
role: 'user',
180
content: [
181
{ type: 'text', text: 'What is in this image?' },
182
{
183
type: 'image_url',
184
// Automatically detected and uploaded
185
image_url: { url: 'data:image/png;base64,iVBORw0KGg...' }
186
}
187
]
188
}
189
]
190
});
191
192
generation.end({
193
output: {
194
description: 'A beautiful sunset over mountains'
195
}
196
});
197
198
await langfuse.flushAsync();
199
```
200
201
### Fetching Media
202
203
Retrieve media metadata and content.
204
205
```typescript { .api }
206
class Langfuse {
207
/**
208
* Fetches media metadata
209
* @param id - Media ID
210
* @returns Media metadata with download URL
211
*/
212
fetchMedia(id: string): Promise<GetMediaResponse>;
213
214
/**
215
* Recursively replaces media reference strings in an object with actual content
216
* @param params - Resolution parameters
217
* @returns Object with resolved media content
218
*/
219
resolveMediaReferences<T>(params: {
220
obj: T;
221
resolveWith: "base64DataUri";
222
maxDepth?: number;
223
}): Promise<T>;
224
}
225
226
interface GetMediaResponse {
227
/** Media ID */
228
mediaId: string;
229
/** Download URL (temporary, expires) */
230
url: string;
231
/** Content type */
232
contentType: MediaContentType;
233
/** Size in bytes */
234
contentLength: number;
235
/** Upload timestamp */
236
uploadedAt: string;
237
}
238
```
239
240
**Usage Example:**
241
242
```typescript
243
// Fetch media metadata
244
const mediaInfo = await langfuse.fetchMedia('media-id-123');
245
246
console.log(mediaInfo.contentType); // "image/png"
247
console.log(mediaInfo.contentLength); // 12345
248
console.log(mediaInfo.url); // Temporary download URL
249
250
// Download the media
251
const response = await fetch(mediaInfo.url);
252
const buffer = await response.arrayBuffer();
253
254
// Resolve media references in a trace
255
const trace = await langfuse.fetchTrace('trace-id-123');
256
257
// Trace input may contain media references like:
258
// @@@langfuseMedia:type=image/png|id=media-123|source=bytes@@@
259
260
// Resolve them to base64 data URIs
261
const resolvedTrace = await langfuse.resolveMediaReferences({
262
obj: trace.data,
263
resolveWith: 'base64DataUri',
264
maxDepth: 10
265
});
266
267
// Now resolvedTrace contains actual base64 data URIs
268
console.log(resolvedTrace.input.image); // "data:image/png;base64,..."
269
```
270
271
### Resolving Media References
272
273
Replace media reference strings with actual content throughout nested objects.
274
275
```typescript { .api }
276
/**
277
* Static method for resolving media references without a Langfuse instance
278
*/
279
LangfuseMedia.resolveMediaReferences<T>(
280
params: LangfuseMediaResolveMediaReferencesParams<T>
281
): Promise<T>;
282
```
283
284
**Usage Example:**
285
286
```typescript
287
import { Langfuse, LangfuseMedia } from 'langfuse';
288
289
const langfuse = new Langfuse();
290
291
// Fetch traces with media references
292
const traces = await langfuse.fetchTraces({
293
limit: 10
294
});
295
296
// Resolve all media references in all traces
297
for (const trace of traces.data) {
298
const resolved = await LangfuseMedia.resolveMediaReferences({
299
obj: trace,
300
langfuseClient: langfuse,
301
resolveWith: 'base64DataUri',
302
maxDepth: 5
303
});
304
305
// Now trace.input and trace.output contain actual media content
306
console.log(resolved.input);
307
}
308
```
309
310
## Media Reference Format
311
312
Langfuse uses a special string format for media references:
313
314
```
315
@@@langfuseMedia:type={contentType}|id={mediaId}|source={source}@@@
316
```
317
318
**Example:**
319
```
320
@@@langfuseMedia:type=image/png|id=550e8400-e29b-41d4-a716-446655440000|source=bytes@@@
321
```
322
323
**Components:**
324
- `type`: Media content type (MIME type)
325
- `id`: Unique media identifier (UUID)
326
- `source`: Source of the media (bytes, base64, file, etc.)
327
328
This format allows efficient storage and retrieval while maintaining references to the actual media content stored in cloud storage.
329
330
## Usage Patterns
331
332
### Vision Models
333
334
Handle image inputs for vision models.
335
336
```typescript
337
import { Langfuse, LangfuseMedia } from 'langfuse';
338
339
const langfuse = new Langfuse();
340
341
// Read image from file
342
const image = new LangfuseMedia({
343
filePath: './screenshot.png',
344
contentType: 'image/png'
345
});
346
347
const trace = langfuse.trace({
348
name: 'screenshot-analysis',
349
input: {
350
image: image,
351
task: 'Extract text from screenshot'
352
}
353
});
354
355
const generation = trace.generation({
356
name: 'gpt-4-vision',
357
model: 'gpt-4-vision-preview',
358
input: [
359
{
360
role: 'user',
361
content: [
362
{ type: 'text', text: 'Extract all text from this image' },
363
{
364
type: 'image_url',
365
image_url: {
366
url: image.toJSON() || '', // Use reference
367
detail: 'high'
368
}
369
}
370
]
371
}
372
]
373
});
374
375
// ... get response from model ...
376
377
generation.end({
378
output: { extracted_text: '...' }
379
});
380
381
await langfuse.flushAsync();
382
```
383
384
### PDF Processing
385
386
Handle PDF documents in traces.
387
388
```typescript
389
import { Langfuse, LangfuseMedia } from 'langfuse';
390
import fs from 'fs';
391
392
const langfuse = new Langfuse();
393
394
// Read PDF file
395
const pdfBuffer = fs.readFileSync('./document.pdf');
396
397
const pdf = new LangfuseMedia({
398
contentBytes: pdfBuffer,
399
contentType: 'application/pdf'
400
});
401
402
const trace = langfuse.trace({
403
name: 'pdf-analysis',
404
input: {
405
document: pdf,
406
analysis_type: 'summary'
407
}
408
});
409
410
const generation = trace.generation({
411
name: 'document-summary',
412
model: 'gpt-4',
413
input: 'Summarize the PDF document',
414
metadata: {
415
document_size: pdfBuffer.length
416
}
417
});
418
419
// ... process PDF and generate summary ...
420
421
generation.end({
422
output: { summary: '...' }
423
});
424
425
await langfuse.flushAsync();
426
```
427
428
### Audio Processing
429
430
Handle audio files for speech-to-text or analysis.
431
432
```typescript
433
import { Langfuse, LangfuseMedia } from 'langfuse';
434
435
const langfuse = new Langfuse();
436
437
const audio = new LangfuseMedia({
438
filePath: './recording.mp3',
439
contentType: 'audio/mpeg'
440
});
441
442
const trace = langfuse.trace({
443
name: 'speech-to-text',
444
input: {
445
audio: audio,
446
language: 'en'
447
}
448
});
449
450
const generation = trace.generation({
451
name: 'whisper-transcription',
452
model: 'whisper-1',
453
input: audio
454
});
455
456
generation.end({
457
output: { text: 'Transcribed text...' }
458
});
459
460
await langfuse.flushAsync();
461
```
462
463
### Multiple Media Items
464
465
Handle traces with multiple media items.
466
467
```typescript
468
import { Langfuse, LangfuseMedia } from 'langfuse';
469
470
const langfuse = new Langfuse();
471
472
const image1 = new LangfuseMedia({
473
base64DataUri: 'data:image/png;base64,...'
474
});
475
476
const image2 = new LangfuseMedia({
477
base64DataUri: 'data:image/jpeg;base64,...'
478
});
479
480
const trace = langfuse.trace({
481
name: 'multi-image-comparison',
482
input: {
483
images: [image1, image2],
484
task: 'Compare these images'
485
}
486
});
487
488
const generation = trace.generation({
489
name: 'image-comparison',
490
model: 'gpt-4-vision-preview',
491
input: [
492
{
493
role: 'user',
494
content: [
495
{ type: 'text', text: 'What are the differences?' },
496
{ type: 'image_url', image_url: { url: image1.toJSON() || '' } },
497
{ type: 'image_url', image_url: { url: image2.toJSON() || '' } }
498
]
499
}
500
]
501
});
502
503
generation.end({
504
output: { differences: ['...'] }
505
});
506
507
await langfuse.flushAsync();
508
```
509
510
### Retrieving Media for Analysis
511
512
Fetch traces and resolve media for local analysis.
513
514
```typescript
515
import { Langfuse } from 'langfuse';
516
import fs from 'fs';
517
518
const langfuse = new Langfuse();
519
520
// Fetch traces with media
521
const traces = await langfuse.fetchTraces({
522
name: 'vision-analysis',
523
limit: 10
524
});
525
526
for (const trace of traces.data) {
527
// Resolve media references to base64
528
const resolved = await langfuse.resolveMediaReferences({
529
obj: trace,
530
resolveWith: 'base64DataUri'
531
});
532
533
// Extract image from base64 data URI
534
if (resolved.input?.image) {
535
const base64Data = resolved.input.image.split(',')[1];
536
const buffer = Buffer.from(base64Data, 'base64');
537
538
// Save to file for analysis
539
fs.writeFileSync(`./trace-${trace.id}.png`, buffer);
540
}
541
}
542
```
543
544
## Content Type Support
545
546
Langfuse supports 50+ MIME types including:
547
548
**Images:**
549
- `image/jpeg`, `image/png`, `image/gif`, `image/webp`, `image/svg+xml`, `image/bmp`, `image/tiff`, `image/heic`, `image/heif`, `image/avif`
550
551
**Documents:**
552
- `application/pdf`, `text/plain`, `text/html`, `text/csv`, `text/markdown`
553
554
**Audio:**
555
- `audio/mpeg`, `audio/wav`, `audio/ogg`, `audio/mp4`, `audio/webm`, `audio/flac`, `audio/aac`
556
557
**Video:**
558
- `video/mp4`, `video/webm`, `video/ogg`, `video/mpeg`, `video/quicktime`, `video/x-msvideo`
559
560
**Archives:**
561
- `application/zip`, `application/gzip`, `application/x-tar`, `application/x-7z-compressed`
562
563
**Data:**
564
- `application/json`, `application/xml`, `application/octet-stream`
565
566
## Complete Media Handling Example
567
568
```typescript
569
import { Langfuse, LangfuseMedia } from 'langfuse';
570
import OpenAI from 'openai';
571
import fs from 'fs';
572
573
const langfuse = new Langfuse();
574
const openai = new OpenAI();
575
576
// Step 1: Create media objects
577
const userImage = new LangfuseMedia({
578
filePath: './uploads/user-photo.jpg',
579
contentType: 'image/jpeg'
580
});
581
582
const referenceImage = new LangfuseMedia({
583
base64DataUri: 'data:image/png;base64,iVBORw0KGg...'
584
});
585
586
// Step 2: Create trace with media
587
const trace = langfuse.trace({
588
name: 'image-comparison-workflow',
589
userId: 'user-123',
590
input: {
591
user_image: userImage,
592
reference_image: referenceImage,
593
task: 'Compare image similarity'
594
}
595
});
596
597
// Step 3: Use with OpenAI Vision
598
const generation = trace.generation({
599
name: 'vision-comparison',
600
model: 'gpt-4-vision-preview',
601
input: [
602
{
603
role: 'user',
604
content: [
605
{
606
type: 'text',
607
text: 'Compare these two images and describe their similarities and differences'
608
},
609
{
610
type: 'image_url',
611
image_url: {
612
url: userImage.toJSON() || '',
613
detail: 'high'
614
}
615
},
616
{
617
type: 'image_url',
618
image_url: {
619
url: referenceImage.toJSON() || '',
620
detail: 'high'
621
}
622
}
623
]
624
}
625
]
626
});
627
628
// Make actual API call
629
const response = await openai.chat.completions.create({
630
model: 'gpt-4-vision-preview',
631
messages: [
632
{
633
role: 'user',
634
content: [
635
{
636
type: 'text',
637
text: 'Compare these images'
638
},
639
{
640
type: 'image_url',
641
image_url: { url: userImage.toJSON() || '' }
642
},
643
{
644
type: 'image_url',
645
image_url: { url: referenceImage.toJSON() || '' }
646
}
647
]
648
}
649
],
650
max_tokens: 500
651
});
652
653
generation.end({
654
output: response.choices[0].message,
655
usage: {
656
input: response.usage?.prompt_tokens,
657
output: response.usage?.completion_tokens,
658
total: response.usage?.total_tokens
659
}
660
});
661
662
trace.update({
663
output: {
664
comparison: response.choices[0].message.content
665
}
666
});
667
668
// Step 4: Flush to upload media
669
await langfuse.flushAsync();
670
671
// Step 5: Later, retrieve and resolve media
672
const fetchedTrace = await langfuse.fetchTrace(trace.id);
673
674
const resolvedTrace = await langfuse.resolveMediaReferences({
675
obj: fetchedTrace.data,
676
resolveWith: 'base64DataUri'
677
});
678
679
// Save resolved images
680
if (resolvedTrace.input?.user_image) {
681
const base64 = resolvedTrace.input.user_image.split(',')[1];
682
fs.writeFileSync('./resolved-user-image.jpg', Buffer.from(base64, 'base64'));
683
}
684
685
console.log('Trace URL:', trace.getTraceUrl());
686
```
687