0
# GridFS File Storage
1
2
Large file storage and retrieval system with streaming upload/download capabilities, file metadata management, and efficient handling of files exceeding BSON document size limits.
3
4
## Capabilities
5
6
### GridFSBuckets Factory
7
8
Factory class for creating GridFS bucket instances with default or custom configurations.
9
10
```java { .api }
11
/**
12
* Factory for creating GridFS buckets
13
*/
14
public final class GridFSBuckets {
15
/**
16
* Creates a GridFS bucket with default name 'fs'
17
* @param database the database to create the bucket in
18
* @return GridFSBucket instance with default configuration
19
*/
20
public static GridFSBucket create(MongoDatabase database);
21
22
/**
23
* Creates a GridFS bucket with custom name
24
* @param database the database to create the bucket in
25
* @param bucketName the name of the GridFS bucket
26
* @return GridFSBucket instance with specified bucket name
27
*/
28
public static GridFSBucket create(MongoDatabase database, String bucketName);
29
}
30
```
31
32
**Usage Examples:**
33
34
```java
35
import com.mongodb.client.gridfs.GridFSBucket;
36
import com.mongodb.client.gridfs.GridFSBuckets;
37
import com.mongodb.client.MongoDatabase;
38
39
// Create default GridFS bucket (uses 'fs' collections)
40
MongoDatabase database = client.getDatabase("myapp");
41
GridFSBucket gridFSBucket = GridFSBuckets.create(database);
42
43
// Create custom named bucket (uses 'photos.files' and 'photos.chunks')
44
GridFSBucket photoBucket = GridFSBuckets.create(database, "photos");
45
GridFSBucket documentBucket = GridFSBuckets.create(database, "documents");
46
```
47
48
### GridFSBucket Interface
49
50
Primary interface for GridFS operations including file upload, download, and management.
51
52
```java { .api }
53
/**
54
* Interface for GridFS bucket operations
55
*/
56
public interface GridFSBucket {
57
/**
58
* Gets the name of this GridFS bucket
59
* @return the bucket name
60
*/
61
String getBucketName();
62
63
/**
64
* Gets the default chunk size in bytes
65
* @return chunk size in bytes
66
*/
67
int getChunkSizeBytes();
68
69
/**
70
* Opens an upload stream for a new file
71
* @param filename the name of the file
72
* @return GridFSUploadStream for writing file data
73
*/
74
GridFSUploadStream openUploadStream(String filename);
75
76
/**
77
* Opens an upload stream with specific file ID
78
* @param id the file ID as BsonValue
79
* @param filename the name of the file
80
* @return GridFSUploadStream for writing file data
81
*/
82
GridFSUploadStream openUploadStream(BsonValue id, String filename);
83
84
/**
85
* Opens an upload stream with upload options
86
* @param filename the name of the file
87
* @param options upload configuration options
88
* @return GridFSUploadStream for writing file data
89
*/
90
GridFSUploadStream openUploadStream(String filename, GridFSUploadOptions options);
91
92
/**
93
* Uploads a file from an InputStream
94
* @param filename the name of the file
95
* @param source the InputStream to read from
96
* @return ObjectId of the uploaded file
97
*/
98
ObjectId uploadFromStream(String filename, InputStream source);
99
100
/**
101
* Uploads a file with specific ID from an InputStream
102
* @param id the file ID as BsonValue
103
* @param filename the name of the file
104
* @param source the InputStream to read from
105
*/
106
void uploadFromStream(BsonValue id, String filename, InputStream source);
107
108
/**
109
* Uploads a file with options from an InputStream
110
* @param filename the name of the file
111
* @param source the InputStream to read from
112
* @param options upload configuration options
113
* @return ObjectId of the uploaded file
114
*/
115
ObjectId uploadFromStream(String filename, InputStream source, GridFSUploadOptions options);
116
117
/**
118
* Opens a download stream by file ID
119
* @param id the file ID as ObjectId
120
* @return GridFSDownloadStream for reading file data
121
*/
122
GridFSDownloadStream openDownloadStream(ObjectId id);
123
124
/**
125
* Opens a download stream by file ID as BsonValue
126
* @param id the file ID as BsonValue
127
* @return GridFSDownloadStream for reading file data
128
*/
129
GridFSDownloadStream openDownloadStream(BsonValue id);
130
131
/**
132
* Opens a download stream by filename (latest version)
133
* @param filename the name of the file
134
* @return GridFSDownloadStream for reading file data
135
*/
136
GridFSDownloadStream openDownloadStream(String filename);
137
138
/**
139
* Opens a download stream by filename with options
140
* @param filename the name of the file
141
* @param options download configuration options
142
* @return GridFSDownloadStream for reading file data
143
*/
144
GridFSDownloadStream openDownloadStream(String filename, GridFSDownloadOptions options);
145
146
/**
147
* Downloads a file to an OutputStream by ID
148
* @param id the file ID as ObjectId
149
* @param destination the OutputStream to write to
150
*/
151
void downloadToStream(ObjectId id, OutputStream destination);
152
153
/**
154
* Downloads a file to an OutputStream by ID as BsonValue
155
* @param id the file ID as BsonValue
156
* @param destination the OutputStream to write to
157
*/
158
void downloadToStream(BsonValue id, OutputStream destination);
159
160
/**
161
* Downloads a file to an OutputStream by filename
162
* @param filename the name of the file
163
* @param destination the OutputStream to write to
164
*/
165
void downloadToStream(String filename, OutputStream destination);
166
167
/**
168
* Deletes a file by ID
169
* @param id the file ID as ObjectId
170
*/
171
void delete(ObjectId id);
172
173
/**
174
* Deletes a file by ID as BsonValue
175
* @param id the file ID as BsonValue
176
*/
177
void delete(BsonValue id);
178
179
/**
180
* Renames a file by ID
181
* @param id the file ID as ObjectId
182
* @param newFilename the new filename
183
*/
184
void rename(ObjectId id, String newFilename);
185
186
/**
187
* Renames a file by ID as BsonValue
188
* @param id the file ID as BsonValue
189
* @param newFilename the new filename
190
*/
191
void rename(BsonValue id, String newFilename);
192
193
/**
194
* Finds files in the bucket
195
* @return GridFSFindIterable for querying files
196
*/
197
GridFSFindIterable find();
198
199
/**
200
* Finds files matching a filter
201
* @param filter the query filter as Bson
202
* @return GridFSFindIterable for querying files
203
*/
204
GridFSFindIterable find(Bson filter);
205
206
/**
207
* Drops the entire GridFS bucket (files and chunks collections)
208
*/
209
void drop();
210
}
211
```
212
213
**Usage Examples:**
214
215
```java
216
import com.mongodb.client.gridfs.model.GridFSUploadOptions;
217
import java.io.FileInputStream;
218
import java.io.FileOutputStream;
219
import java.io.ByteArrayInputStream;
220
221
// Upload file from InputStream
222
try (FileInputStream fileInput = new FileInputStream("large-document.pdf")) {
223
ObjectId fileId = gridFSBucket.uploadFromStream("large-document.pdf", fileInput);
224
System.out.println("File uploaded with ID: " + fileId);
225
}
226
227
// Upload with metadata and custom chunk size
228
Document metadata = new Document("contentType", "application/pdf")
229
.append("department", "legal")
230
.append("confidential", true);
231
232
GridFSUploadOptions uploadOptions = new GridFSUploadOptions()
233
.chunkSizeBytes(1024 * 1024) // 1MB chunks
234
.metadata(metadata);
235
236
try (FileInputStream fileInput = new FileInputStream("contract.pdf")) {
237
ObjectId fileId = gridFSBucket.uploadFromStream("contract.pdf", fileInput, uploadOptions);
238
System.out.println("Contract uploaded with ID: " + fileId);
239
}
240
241
// Download file to OutputStream
242
try (FileOutputStream fileOutput = new FileOutputStream("downloaded-contract.pdf")) {
243
gridFSBucket.downloadToStream(fileId, fileOutput);
244
System.out.println("File downloaded successfully");
245
}
246
247
// Stream-based upload for large files
248
GridFSUploadStream uploadStream = gridFSBucket.openUploadStream("streaming-upload.dat");
249
try {
250
// Write data in chunks
251
byte[] buffer = new byte[8192];
252
int bytesRead;
253
while ((bytesRead = sourceInputStream.read(buffer)) != -1) {
254
uploadStream.write(buffer, 0, bytesRead);
255
}
256
} finally {
257
uploadStream.close();
258
}
259
ObjectId streamFileId = uploadStream.getObjectId();
260
```
261
262
### GridFSUploadStream
263
264
OutputStream implementation for streaming file uploads to GridFS.
265
266
```java { .api }
267
/**
268
* OutputStream for uploading files to GridFS
269
*/
270
public abstract class GridFSUploadStream extends OutputStream {
271
/**
272
* Gets the ObjectId of the file being uploaded
273
* @return ObjectId of the file
274
*/
275
public abstract ObjectId getObjectId();
276
277
/**
278
* Gets the BsonValue ID of the file being uploaded
279
* @return BsonValue ID of the file
280
*/
281
public abstract BsonValue getId();
282
283
/**
284
* Writes a byte to the stream
285
* @param b the byte to write
286
*/
287
@Override
288
public abstract void write(int b) throws IOException;
289
290
/**
291
* Writes a byte array to the stream
292
* @param b the byte array to write
293
* @param off the start offset
294
* @param len the number of bytes to write
295
*/
296
@Override
297
public abstract void write(byte[] b, int off, int len) throws IOException;
298
299
/**
300
* Flushes the stream
301
*/
302
@Override
303
public abstract void flush() throws IOException;
304
305
/**
306
* Closes the stream and completes the upload
307
*/
308
@Override
309
public abstract void close() throws IOException;
310
}
311
```
312
313
**Usage Examples:**
314
315
```java
316
// Progressive upload with progress tracking
317
GridFSUploadStream uploadStream = gridFSBucket.openUploadStream("progress-upload.bin");
318
try {
319
long totalBytes = sourceFile.length();
320
long uploadedBytes = 0;
321
322
byte[] buffer = new byte[64 * 1024]; // 64KB buffer
323
int bytesRead;
324
325
while ((bytesRead = sourceInputStream.read(buffer)) != -1) {
326
uploadStream.write(buffer, 0, bytesRead);
327
uploadedBytes += bytesRead;
328
329
// Report progress
330
double progress = (double) uploadedBytes / totalBytes * 100;
331
System.out.printf("Upload progress: %.1f%%\n", progress);
332
}
333
} finally {
334
uploadStream.close();
335
}
336
337
System.out.println("Upload completed. File ID: " + uploadStream.getObjectId());
338
```
339
340
### GridFSDownloadStream
341
342
InputStream implementation for streaming file downloads from GridFS.
343
344
```java { .api }
345
/**
346
* InputStream for downloading files from GridFS
347
*/
348
public abstract class GridFSDownloadStream extends InputStream {
349
/**
350
* Gets the GridFS file information
351
* @return GridFSFile containing file metadata
352
*/
353
public abstract GridFSFile getGridFSFile();
354
355
/**
356
* Reads a byte from the stream
357
* @return the byte read, or -1 if end of stream
358
*/
359
@Override
360
public abstract int read() throws IOException;
361
362
/**
363
* Reads bytes into a buffer
364
* @param b the buffer to read into
365
* @param off the start offset
366
* @param len the maximum number of bytes to read
367
* @return number of bytes read, or -1 if end of stream
368
*/
369
@Override
370
public abstract int read(byte[] b, int off, int len) throws IOException;
371
372
/**
373
* Skips bytes in the stream
374
* @param n the number of bytes to skip
375
* @return actual number of bytes skipped
376
*/
377
@Override
378
public abstract long skip(long n) throws IOException;
379
380
/**
381
* Returns available bytes to read
382
* @return number of available bytes
383
*/
384
@Override
385
public abstract int available() throws IOException;
386
387
/**
388
* Closes the stream
389
*/
390
@Override
391
public abstract void close() throws IOException;
392
}
393
```
394
395
**Usage Examples:**
396
397
```java
398
// Stream download with file information
399
try (GridFSDownloadStream downloadStream = gridFSBucket.openDownloadStream(fileId)) {
400
GridFSFile fileInfo = downloadStream.getGridFSFile();
401
402
System.out.println("Filename: " + fileInfo.getFilename());
403
System.out.println("File size: " + fileInfo.getLength() + " bytes");
404
System.out.println("Upload date: " + fileInfo.getUploadDate());
405
System.out.println("Content type: " + fileInfo.getMetadata().getString("contentType"));
406
407
// Read file in chunks
408
byte[] buffer = new byte[8192];
409
int bytesRead;
410
long totalRead = 0;
411
412
while ((bytesRead = downloadStream.read(buffer)) != -1) {
413
// Process chunk
414
processFileChunk(buffer, bytesRead);
415
totalRead += bytesRead;
416
417
// Progress reporting
418
double progress = (double) totalRead / fileInfo.getLength() * 100;
419
System.out.printf("Download progress: %.1f%%\n", progress);
420
}
421
}
422
```
423
424
### GridFSFindIterable
425
426
Query interface for finding and filtering GridFS files.
427
428
```java { .api }
429
/**
430
* Interface for querying GridFS files
431
*/
432
public interface GridFSFindIterable extends MongoIterable<GridFSFile> {
433
/**
434
* Sets the query filter to limit results
435
* @param filter the query filter as Bson
436
* @return GridFSFindIterable with applied filter
437
*/
438
GridFSFindIterable filter(Bson filter);
439
440
/**
441
* Sets the maximum number of files to return
442
* @param limit the maximum number of files
443
* @return GridFSFindIterable with applied limit
444
*/
445
GridFSFindIterable limit(int limit);
446
447
/**
448
* Sets the number of files to skip
449
* @param skip the number of files to skip
450
* @return GridFSFindIterable with applied skip
451
*/
452
GridFSFindIterable skip(int skip);
453
454
/**
455
* Sets the sort criteria for results
456
* @param sort the sort specification as Bson
457
* @return GridFSFindIterable with applied sort
458
*/
459
GridFSFindIterable sort(Bson sort);
460
461
/**
462
* Sets whether to timeout the cursor
463
* @param noCursorTimeout true to disable cursor timeout
464
* @return GridFSFindIterable with cursor timeout setting
465
*/
466
GridFSFindIterable noCursorTimeout(boolean noCursorTimeout);
467
468
/**
469
* Sets the maximum execution time
470
* @param maxTime the maximum time
471
* @param timeUnit the time unit
472
* @return GridFSFindIterable with time limit
473
*/
474
GridFSFindIterable maxTime(long maxTime, TimeUnit timeUnit);
475
476
/**
477
* Sets the batch size for cursor operations
478
* @param batchSize the batch size
479
* @return GridFSFindIterable with specified batch size
480
*/
481
GridFSFindIterable batchSize(int batchSize);
482
483
/**
484
* Sets collation for string comparisons
485
* @param collation the collation specification
486
* @return GridFSFindIterable with applied collation
487
*/
488
GridFSFindIterable collation(Collation collation);
489
}
490
```
491
492
**Usage Examples:**
493
494
```java
495
import com.mongodb.client.model.Filters;
496
import com.mongodb.client.model.Sorts;
497
498
// Find all PDF files
499
GridFSFindIterable pdfFiles = gridFSBucket.find()
500
.filter(Filters.eq("metadata.contentType", "application/pdf"))
501
.sort(Sorts.descending("uploadDate"));
502
503
for (GridFSFile file : pdfFiles) {
504
System.out.println("PDF File: " + file.getFilename() +
505
" (Size: " + file.getLength() + " bytes)");
506
}
507
508
// Find large files uploaded recently
509
Date oneWeekAgo = Date.from(Instant.now().minus(7, ChronoUnit.DAYS));
510
GridFSFindIterable recentLargeFiles = gridFSBucket.find()
511
.filter(Filters.and(
512
Filters.gte("uploadDate", oneWeekAgo),
513
Filters.gte("length", 10 * 1024 * 1024) // 10MB+
514
))
515
.sort(Sorts.descending("length"))
516
.limit(10);
517
518
// Find files by custom metadata
519
GridFSFindIterable departmentFiles = gridFSBucket.find()
520
.filter(Filters.and(
521
Filters.eq("metadata.department", "marketing"),
522
Filters.eq("metadata.confidential", false)
523
))
524
.sort(Sorts.ascending("filename"));
525
526
// File cleanup - find old temporary files
527
Date thirtyDaysAgo = Date.from(Instant.now().minus(30, ChronoUnit.DAYS));
528
GridFSFindIterable tempFiles = gridFSBucket.find()
529
.filter(Filters.and(
530
Filters.regex("filename", "^temp_"),
531
Filters.lt("uploadDate", thirtyDaysAgo)
532
));
533
534
for (GridFSFile tempFile : tempFiles) {
535
System.out.println("Deleting old temp file: " + tempFile.getFilename());
536
gridFSBucket.delete(tempFile.getObjectId());
537
}
538
```
539
540
### Configuration and Performance
541
542
GridFS bucket configuration and performance optimization techniques.
543
544
```java { .api }
545
/**
546
* Configuration methods for GridFS buckets
547
*/
548
// Create bucket with custom codec registry
549
CodecRegistry codecRegistry = CodecRegistries.fromRegistries(
550
MongoClientSettings.getDefaultCodecRegistry(),
551
CodecRegistries.fromProviders(PojoCodecProvider.builder().automatic(true).build())
552
);
553
554
GridFSBucket configuredBucket = gridFSBucket
555
.withCodecRegistry(codecRegistry)
556
.withReadPreference(ReadPreference.secondaryPreferred())
557
.withWriteConcern(WriteConcern.MAJORITY)
558
.withReadConcern(ReadConcern.MAJORITY);
559
560
// Optimize chunk size for different file types
561
GridFSUploadOptions smallFiles = new GridFSUploadOptions()
562
.chunkSizeBytes(64 * 1024); // 64KB for small files
563
564
GridFSUploadOptions largeFiles = new GridFSUploadOptions()
565
.chunkSizeBytes(2 * 1024 * 1024); // 2MB for large files
566
567
// Index optimization for file queries
568
MongoCollection<Document> filesCollection = database.getCollection(bucketName + ".files");
569
570
// Index for filename queries
571
filesCollection.createIndex(Indexes.ascending("filename"));
572
573
// Index for metadata queries
574
filesCollection.createIndex(Indexes.ascending("metadata.contentType"));
575
filesCollection.createIndex(Indexes.ascending("metadata.department"));
576
577
// Compound index for common queries
578
filesCollection.createIndex(Indexes.compound(
579
Indexes.ascending("metadata.department"),
580
Indexes.descending("uploadDate")
581
));
582
```