or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-queries.mdbson-handling.mdbulk-transactions.mdclient-connection.mddatabase-collection.mdgridfs-storage.mdindex.mdmonitoring-events.md

gridfs-storage.mddocs/

0

# GridFS File Storage

1

2

GridFS support for storing and retrieving large files, including streaming operations and metadata management.

3

4

## Capabilities

5

6

### GridFS Interface

7

8

Legacy GridFS interface for file storage operations.

9

10

```python { .api }

11

class GridFS:

12

def __init__(self, database, collection="fs", disable_md5=False):

13

"""

14

GridFS instance for file operations.

15

16

Parameters:

17

- database: Database instance

18

- collection: GridFS collection prefix (default "fs")

19

- disable_md5: disable MD5 checksum calculation

20

"""

21

22

def new_file(self, **kwargs):

23

"""

24

Create new GridFS file for writing.

25

26

Parameters:

27

- _id: file identifier

28

- filename: file name

29

- contentType: MIME content type

30

- chunkSize: chunk size in bytes

31

- metadata: custom metadata dictionary

32

33

Returns:

34

GridIn: File handle for writing

35

"""

36

37

def put(self, data, **kwargs):

38

"""

39

Store data as GridFS file.

40

41

Parameters:

42

- data: file data (bytes or file-like object)

43

- kwargs: same as new_file()

44

45

Returns:

46

ObjectId: File identifier

47

"""

48

49

def get(self, file_id, session=None):

50

"""

51

Retrieve file by ID.

52

53

Parameters:

54

- file_id: file identifier

55

- session: optional ClientSession

56

57

Returns:

58

GridOut: File handle for reading

59

60

Raises:

61

NoFile: if file not found

62

"""

63

64

def get_version(self, filename=None, version=-1, session=None, **kwargs):

65

"""

66

Retrieve file by filename and version.

67

68

Parameters:

69

- filename: file name

70

- version: version number (-1 for latest)

71

- session: optional ClientSession

72

73

Returns:

74

GridOut: File handle for reading

75

76

Raises:

77

NoFile: if file not found

78

"""

79

80

def get_last_version(self, filename=None, session=None, **kwargs):

81

"""

82

Retrieve latest version of file by filename.

83

84

Parameters:

85

- filename: file name

86

- session: optional ClientSession

87

88

Returns:

89

GridOut: File handle for reading

90

91

Raises:

92

NoFile: if file not found

93

"""

94

95

def delete(self, file_id, session=None):

96

"""

97

Delete file by ID.

98

99

Parameters:

100

- file_id: file identifier

101

- session: optional ClientSession

102

103

Raises:

104

NoFile: if file not found

105

"""

106

107

def list(self, session=None):

108

"""

109

List stored filenames.

110

111

Parameters:

112

- session: optional ClientSession

113

114

Returns:

115

list: List of filenames

116

"""

117

118

def find_one(self, filter=None, session=None, *args, **kwargs):

119

"""

120

Find single file by filter.

121

122

Parameters:

123

- filter: query criteria

124

- session: optional ClientSession

125

126

Returns:

127

GridOut: File handle or None

128

"""

129

130

def find(self, *args, **kwargs):

131

"""

132

Find files matching criteria.

133

134

Parameters:

135

- filter: query criteria

136

- skip: number of files to skip

137

- limit: maximum number of files

138

- sort: sort specification

139

- session: optional ClientSession

140

141

Returns:

142

GridOutCursor: Cursor for files

143

"""

144

145

def exists(self, document_or_id=None, session=None, **kwargs):

146

"""

147

Check if file exists.

148

149

Parameters:

150

- document_or_id: file ID or query document

151

- session: optional ClientSession

152

153

Returns:

154

bool: True if file exists

155

"""

156

```

157

158

### GridFSBucket Interface

159

160

Modern GridFS interface with streaming support (recommended).

161

162

```python { .api }

163

class GridFSBucket:

164

def __init__(

165

self,

166

db,

167

bucket_name="fs",

168

chunk_size_bytes=DEFAULT_CHUNK_SIZE,

169

write_concern=None,

170

read_preference=None,

171

disable_md5=False

172

):

173

"""

174

GridFS bucket for file operations.

175

176

Parameters:

177

- db: Database instance

178

- bucket_name: bucket name (default "fs")

179

- chunk_size_bytes: default chunk size

180

- write_concern: write concern for operations

181

- read_preference: read preference for operations

182

- disable_md5: disable MD5 checksum calculation

183

"""

184

185

def open_upload_stream(

186

self,

187

filename,

188

chunk_size_bytes=None,

189

metadata=None,

190

session=None

191

):

192

"""

193

Open upload stream for writing file.

194

195

Parameters:

196

- filename: file name

197

- chunk_size_bytes: chunk size override

198

- metadata: custom metadata dictionary

199

- session: optional ClientSession

200

201

Returns:

202

GridIn: Upload stream

203

"""

204

205

def open_upload_stream_with_id(

206

self,

207

file_id,

208

filename,

209

chunk_size_bytes=None,

210

metadata=None,

211

session=None

212

):

213

"""

214

Open upload stream with specific file ID.

215

216

Parameters:

217

- file_id: file identifier

218

- filename: file name

219

- chunk_size_bytes: chunk size override

220

- metadata: custom metadata dictionary

221

- session: optional ClientSession

222

223

Returns:

224

GridIn: Upload stream

225

"""

226

227

def upload_from_stream(

228

self,

229

filename,

230

source,

231

chunk_size_bytes=None,

232

metadata=None,

233

session=None

234

):

235

"""

236

Upload file from stream.

237

238

Parameters:

239

- filename: file name

240

- source: readable file-like object

241

- chunk_size_bytes: chunk size override

242

- metadata: custom metadata dictionary

243

- session: optional ClientSession

244

245

Returns:

246

ObjectId: File identifier

247

"""

248

249

def upload_from_stream_with_id(

250

self,

251

file_id,

252

filename,

253

source,

254

chunk_size_bytes=None,

255

metadata=None,

256

session=None

257

):

258

"""

259

Upload file from stream with specific ID.

260

261

Parameters:

262

- file_id: file identifier

263

- filename: file name

264

- source: readable file-like object

265

- chunk_size_bytes: chunk size override

266

- metadata: custom metadata dictionary

267

- session: optional ClientSession

268

"""

269

270

def open_download_stream(self, file_id, session=None):

271

"""

272

Open download stream by file ID.

273

274

Parameters:

275

- file_id: file identifier

276

- session: optional ClientSession

277

278

Returns:

279

GridOut: Download stream

280

281

Raises:

282

NoFile: if file not found

283

"""

284

285

def download_to_stream(self, file_id, destination, session=None):

286

"""

287

Download file to stream by ID.

288

289

Parameters:

290

- file_id: file identifier

291

- destination: writable file-like object

292

- session: optional ClientSession

293

294

Raises:

295

NoFile: if file not found

296

"""

297

298

def delete(self, file_id, session=None):

299

"""

300

Delete file by ID.

301

302

Parameters:

303

- file_id: file identifier

304

- session: optional ClientSession

305

306

Raises:

307

NoFile: if file not found

308

"""

309

310

def find(self, filter=None, session=None, **kwargs):

311

"""

312

Find files matching criteria.

313

314

Parameters:

315

- filter: query criteria for files collection

316

- batch_size: cursor batch size

317

- limit: maximum number of files

318

- skip: number of files to skip

319

- sort: sort specification

320

- session: optional ClientSession

321

322

Returns:

323

GridOutCursor: Cursor for files

324

"""

325

326

def open_download_stream_by_name(

327

self,

328

filename,

329

revision=-1,

330

session=None

331

):

332

"""

333

Open download stream by filename.

334

335

Parameters:

336

- filename: file name

337

- revision: file revision (-1 for latest)

338

- session: optional ClientSession

339

340

Returns:

341

GridOut: Download stream

342

343

Raises:

344

NoFile: if file not found

345

"""

346

347

def download_to_stream_by_name(

348

self,

349

filename,

350

destination,

351

revision=-1,

352

session=None

353

):

354

"""

355

Download file to stream by name.

356

357

Parameters:

358

- filename: file name

359

- destination: writable file-like object

360

- revision: file revision (-1 for latest)

361

- session: optional ClientSession

362

363

Raises:

364

NoFile: if file not found

365

"""

366

367

def rename(self, file_id, new_filename, session=None):

368

"""

369

Rename file.

370

371

Parameters:

372

- file_id: file identifier

373

- new_filename: new file name

374

- session: optional ClientSession

375

376

Raises:

377

NoFile: if file not found

378

"""

379

```

380

381

### GridFS File Objects

382

383

File objects for reading and writing GridFS files.

384

385

```python { .api }

386

class GridIn:

387

def __init__(self, root_collection, session=None, disable_md5=False, **kwargs):

388

"""

389

GridFS file for writing.

390

391

Parameters:

392

- root_collection: GridFS root collection

393

- session: optional ClientSession

394

- disable_md5: disable MD5 calculation

395

- kwargs: file metadata

396

"""

397

398

def write(self, data):

399

"""

400

Write data to file.

401

402

Parameters:

403

- data: bytes to write

404

"""

405

406

def writelines(self, lines):

407

"""

408

Write sequence of bytes.

409

410

Parameters:

411

- lines: sequence of bytes

412

"""

413

414

def close(self):

415

"""Close file and finalize upload."""

416

417

def abort(self):

418

"""Abort upload and delete partial file."""

419

420

@property

421

def closed(self):

422

"""

423

Check if file is closed.

424

425

Returns:

426

bool: True if closed

427

"""

428

429

@property

430

def _id(self):

431

"""

432

File identifier.

433

434

Returns:

435

ObjectId: File ID

436

"""

437

438

@property

439

def filename(self):

440

"""

441

File name.

442

443

Returns:

444

str: File name

445

"""

446

447

@property

448

def length(self):

449

"""

450

File size in bytes.

451

452

Returns:

453

int: File size

454

"""

455

456

@property

457

def chunk_size(self):

458

"""

459

Chunk size in bytes.

460

461

Returns:

462

int: Chunk size

463

"""

464

465

@property

466

def upload_date(self):

467

"""

468

Upload completion timestamp.

469

470

Returns:

471

datetime: Upload date

472

"""

473

474

@property

475

def md5(self):

476

"""

477

MD5 checksum (if enabled).

478

479

Returns:

480

str: MD5 hash or None

481

"""

482

483

@property

484

def metadata(self):

485

"""

486

Custom metadata.

487

488

Returns:

489

dict: Metadata dictionary

490

"""

491

492

class GridOut:

493

def __init__(self, root_collection, file_id=None, file_document=None, session=None):

494

"""

495

GridFS file for reading.

496

497

Parameters:

498

- root_collection: GridFS root collection

499

- file_id: file identifier

500

- file_document: file document

501

- session: optional ClientSession

502

"""

503

504

def read(self, size=-1):

505

"""

506

Read data from file.

507

508

Parameters:

509

- size: bytes to read (-1 for all)

510

511

Returns:

512

bytes: File data

513

"""

514

515

def readline(self, size=-1):

516

"""

517

Read line from file.

518

519

Parameters:

520

- size: maximum bytes to read

521

522

Returns:

523

bytes: Line data

524

"""

525

526

def readlines(self):

527

"""

528

Read all lines from file.

529

530

Returns:

531

list: List of lines as bytes

532

"""

533

534

def seek(self, pos, whence=0):

535

"""

536

Seek to file position.

537

538

Parameters:

539

- pos: position

540

- whence: seek mode (0=absolute, 1=relative, 2=from end)

541

"""

542

543

def tell(self):

544

"""

545

Get current file position.

546

547

Returns:

548

int: Current position

549

"""

550

551

def close(self):

552

"""Close file."""

553

554

def __iter__(self):

555

"""Iterate over file lines."""

556

557

def __enter__(self):

558

"""Context manager entry."""

559

560

def __exit__(self, exc_type, exc_val, exc_tb):

561

"""Context manager exit."""

562

563

# Same properties as GridIn

564

@property

565

def _id(self): ...

566

@property

567

def filename(self): ...

568

@property

569

def length(self): ...

570

@property

571

def chunk_size(self): ...

572

@property

573

def upload_date(self): ...

574

@property

575

def md5(self): ...

576

@property

577

def metadata(self): ...

578

579

class GridOutCursor:

580

def __init__(self, collection, filter=None, session=None, **kwargs):

581

"""

582

Cursor for GridFS files.

583

584

Parameters:

585

- collection: files collection

586

- filter: query criteria

587

- session: optional ClientSession

588

- kwargs: cursor options

589

"""

590

591

def __iter__(self):

592

"""Iterate over files."""

593

594

def __next__(self):

595

"""Get next file."""

596

597

def next(self):

598

"""Get next file (Python 2 compatibility)."""

599

600

def clone(self):

601

"""Clone cursor."""

602

603

def count(self):

604

"""

605

Count matching files.

606

607

Returns:

608

int: File count

609

"""

610

```

611

612

### Constants and Exceptions

613

614

GridFS-related constants and error handling.

615

616

```python { .api }

617

DEFAULT_CHUNK_SIZE: int # Default chunk size (255KB)

618

619

class NoFile(Exception):

620

"""Raised when GridFS file is not found."""

621

```

622

623

## Usage Examples

624

625

### Basic GridFS Operations

626

627

```python

628

from pymongo import MongoClient

629

import gridfs

630

from io import BytesIO

631

632

client = MongoClient()

633

db = client.mydb

634

fs = gridfs.GridFS(db)

635

636

# Store a file

637

with open("image.jpg", "rb") as f:

638

file_id = fs.put(f, filename="profile.jpg", contentType="image/jpeg")

639

print(f"Stored file with ID: {file_id}")

640

641

# Retrieve a file

642

grid_out = fs.get(file_id)

643

with open("downloaded.jpg", "wb") as f:

644

f.write(grid_out.read())

645

646

print(f"Downloaded {grid_out.filename}, size: {grid_out.length} bytes")

647

648

# Store with metadata

649

file_id = fs.put(

650

b"Hello, GridFS!",

651

filename="greeting.txt",

652

contentType="text/plain",

653

metadata={"author": "Alice", "tags": ["greeting", "sample"]}

654

)

655

656

# Find and list files

657

for grid_file in fs.find({"metadata.author": "Alice"}):

658

print(f"File: {grid_file.filename}, Author: {grid_file.metadata['author']}")

659

660

# Delete a file

661

fs.delete(file_id)

662

```

663

664

### GridFSBucket Operations (Recommended)

665

666

```python

667

from pymongo import MongoClient

668

import gridfs

669

from io import BytesIO

670

671

client = MongoClient()

672

db = client.mydb

673

bucket = gridfs.GridFSBucket(db, bucket_name="images")

674

675

# Upload from stream

676

with open("photo.jpg", "rb") as f:

677

file_id = bucket.upload_from_stream(

678

"user_photo.jpg",

679

f,

680

metadata={"user_id": 12345, "category": "profile"}

681

)

682

683

print(f"Uploaded photo with ID: {file_id}")

684

685

# Download to stream

686

with open("downloaded_photo.jpg", "wb") as f:

687

bucket.download_to_stream(file_id, f)

688

689

# Upload with custom chunk size for large files

690

with open("video.mp4", "rb") as f:

691

file_id = bucket.upload_from_stream(

692

"presentation.mp4",

693

f,

694

chunk_size_bytes=1024*1024, # 1MB chunks

695

metadata={"duration": 1800, "resolution": "1080p"}

696

)

697

698

# Stream processing

699

upload_stream = bucket.open_upload_stream(

700

"processed_data.csv",

701

metadata={"processing_date": "2023-06-01"}

702

)

703

704

# Write data in chunks

705

for chunk in process_large_dataset():

706

upload_stream.write(chunk.encode())

707

708

upload_stream.close()

709

print(f"Processed file ID: {upload_stream._id}")

710

```

711

712

### Advanced GridFS Usage

713

714

```python

715

import gridfs

716

from bson import ObjectId

717

from datetime import datetime

718

719

# Custom GridFS collection

720

fs = gridfs.GridFS(db, collection="documents")

721

722

# Store with specific file ID

723

custom_id = ObjectId()

724

fs.put(

725

b"Important document content",

726

_id=custom_id,

727

filename="contract.pdf",

728

contentType="application/pdf",

729

metadata={

730

"department": "legal",

731

"confidential": True,

732

"expires": datetime(2025, 12, 31)

733

}

734

)

735

736

# Find files with complex queries

737

large_images = fs.find({

738

"contentType": {"$regex": "^image/"},

739

"length": {"$gt": 1024*1024}, # > 1MB

740

"uploadDate": {"$gte": datetime(2023, 1, 1)}

741

}).sort("uploadDate", -1)

742

743

for img in large_images:

744

print(f"Large image: {img.filename}, {img.length/1024/1024:.1f}MB")

745

746

# Version management by filename

747

versions = list(fs.find({"filename": "document.txt"}).sort("uploadDate", 1))

748

print(f"Found {len(versions)} versions of document.txt")

749

750

# Get latest version

751

latest = fs.get_last_version("document.txt")

752

print(f"Latest version uploaded: {latest.upload_date}")

753

754

# Stream reading

755

grid_out = fs.get(file_id)

756

while True:

757

chunk = grid_out.read(8192) # Read 8KB chunks

758

if not chunk:

759

break

760

process_chunk(chunk)

761

grid_out.close()

762

```

763

764

### GridFS with Transactions

765

766

```python

767

import gridfs

768

from pymongo.errors import PyMongoError

769

770

client = MongoClient()

771

db = client.mydb

772

bucket = gridfs.GridFSBucket(db)

773

774

# GridFS operations in transaction

775

with client.start_session() as session:

776

with session.start_transaction():

777

try:

778

# Upload file

779

with open("data.json", "rb") as f:

780

file_id = bucket.upload_from_stream(

781

"backup.json",

782

f,

783

session=session

784

)

785

786

# Update metadata in related collection

787

db.backups.insert_one({

788

"file_id": file_id,

789

"created_date": datetime.now(),

790

"status": "completed"

791

}, session=session)

792

793

print("Backup created successfully")

794

795

except PyMongoError as e:

796

print(f"Backup failed: {e}")

797

raise # Will abort transaction

798

799

# Cleanup old backups

800

def cleanup_old_backups(session):

801

"""Remove backups older than 30 days."""

802

cutoff_date = datetime.now() - timedelta(days=30)

803

804

old_backups = db.backups.find(

805

{"created_date": {"$lt": cutoff_date}},

806

session=session

807

)

808

809

for backup in old_backups:

810

# Delete GridFS file

811

bucket.delete(backup["file_id"], session=session)

812

# Delete metadata

813

db.backups.delete_one({"_id": backup["_id"]}, session=session)

814

815

# Run cleanup in transaction

816

with client.start_session() as session:

817

session.with_transaction(cleanup_old_backups)

818

```