Tessl Tile for pypi/pymongo@3.13.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-queries.md bson-handling.md bulk-transactions.md client-connection.md database-collection.md gridfs-storage.md index.md monitoring-events.md

gridfs-storage.mddocs/

0
# GridFS File Storage
1

2
GridFS support for storing and retrieving large files, including streaming operations and metadata management.
3

4
## Capabilities
5

6
### GridFS Interface
7

8
Legacy GridFS interface for file storage operations.
9

10
```python { .api }
11
class GridFS:
12
    def __init__(self, database, collection="fs", disable_md5=False):
13
        """
14
        GridFS instance for file operations.
15

16
        Parameters:
17
        - database: Database instance
18
        - collection: GridFS collection prefix (default "fs")
19
        - disable_md5: disable MD5 checksum calculation
20
        """
21

22
    def new_file(self, **kwargs):
23
        """
24
        Create new GridFS file for writing.
25

26
        Parameters:
27
        - _id: file identifier
28
        - filename: file name
29
        - contentType: MIME content type
30
        - chunkSize: chunk size in bytes
31
        - metadata: custom metadata dictionary
32

33
        Returns:
34
        GridIn: File handle for writing
35
        """
36

37
    def put(self, data, **kwargs):
38
        """
39
        Store data as GridFS file.
40

41
        Parameters:
42
        - data: file data (bytes or file-like object)
43
        - kwargs: same as new_file()
44

45
        Returns:
46
        ObjectId: File identifier
47
        """
48

49
    def get(self, file_id, session=None):
50
        """
51
        Retrieve file by ID.
52

53
        Parameters:
54
        - file_id: file identifier
55
        - session: optional ClientSession
56

57
        Returns:
58
        GridOut: File handle for reading
59

60
        Raises:
61
        NoFile: if file not found
62
        """
63

64
    def get_version(self, filename=None, version=-1, session=None, **kwargs):
65
        """
66
        Retrieve file by filename and version.
67

68
        Parameters:
69
        - filename: file name
70
        - version: version number (-1 for latest)
71
        - session: optional ClientSession
72

73
        Returns:
74
        GridOut: File handle for reading
75

76
        Raises:
77
        NoFile: if file not found
78
        """
79

80
    def get_last_version(self, filename=None, session=None, **kwargs):
81
        """
82
        Retrieve latest version of file by filename.
83

84
        Parameters:
85
        - filename: file name
86
        - session: optional ClientSession
87

88
        Returns:
89
        GridOut: File handle for reading
90

91
        Raises:
92
        NoFile: if file not found
93
        """
94

95
    def delete(self, file_id, session=None):
96
        """
97
        Delete file by ID.
98

99
        Parameters:
100
        - file_id: file identifier
101
        - session: optional ClientSession
102

103
        Raises:
104
        NoFile: if file not found
105
        """
106

107
    def list(self, session=None):
108
        """
109
        List stored filenames.
110

111
        Parameters:
112
        - session: optional ClientSession
113

114
        Returns:
115
        list: List of filenames
116
        """
117

118
    def find_one(self, filter=None, session=None, *args, **kwargs):
119
        """
120
        Find single file by filter.
121

122
        Parameters:
123
        - filter: query criteria
124
        - session: optional ClientSession
125

126
        Returns:
127
        GridOut: File handle or None
128
        """
129

130
    def find(self, *args, **kwargs):
131
        """
132
        Find files matching criteria.
133

134
        Parameters:
135
        - filter: query criteria
136
        - skip: number of files to skip
137
        - limit: maximum number of files
138
        - sort: sort specification
139
        - session: optional ClientSession
140

141
        Returns:
142
        GridOutCursor: Cursor for files
143
        """
144

145
    def exists(self, document_or_id=None, session=None, **kwargs):
146
        """
147
        Check if file exists.
148

149
        Parameters:
150
        - document_or_id: file ID or query document
151
        - session: optional ClientSession
152

153
        Returns:
154
        bool: True if file exists
155
        """
156
```
157

158
### GridFSBucket Interface
159

160
Modern GridFS interface with streaming support (recommended).
161

162
```python { .api }
163
class GridFSBucket:
164
    def __init__(
165
        self,
166
        db,
167
        bucket_name="fs",
168
        chunk_size_bytes=DEFAULT_CHUNK_SIZE,
169
        write_concern=None,
170
        read_preference=None,
171
        disable_md5=False
172
    ):
173
        """
174
        GridFS bucket for file operations.
175

176
        Parameters:
177
        - db: Database instance
178
        - bucket_name: bucket name (default "fs")
179
        - chunk_size_bytes: default chunk size
180
        - write_concern: write concern for operations
181
        - read_preference: read preference for operations
182
        - disable_md5: disable MD5 checksum calculation
183
        """
184

185
    def open_upload_stream(
186
        self,
187
        filename,
188
        chunk_size_bytes=None,
189
        metadata=None,
190
        session=None
191
    ):
192
        """
193
        Open upload stream for writing file.
194

195
        Parameters:
196
        - filename: file name
197
        - chunk_size_bytes: chunk size override
198
        - metadata: custom metadata dictionary
199
        - session: optional ClientSession
200

201
        Returns:
202
        GridIn: Upload stream
203
        """
204

205
    def open_upload_stream_with_id(
206
        self,
207
        file_id,
208
        filename,
209
        chunk_size_bytes=None,
210
        metadata=None,
211
        session=None
212
    ):
213
        """
214
        Open upload stream with specific file ID.
215

216
        Parameters:
217
        - file_id: file identifier
218
        - filename: file name
219
        - chunk_size_bytes: chunk size override
220
        - metadata: custom metadata dictionary
221
        - session: optional ClientSession
222

223
        Returns:
224
        GridIn: Upload stream
225
        """
226

227
    def upload_from_stream(
228
        self,
229
        filename,
230
        source,
231
        chunk_size_bytes=None,
232
        metadata=None,
233
        session=None
234
    ):
235
        """
236
        Upload file from stream.
237

238
        Parameters:
239
        - filename: file name
240
        - source: readable file-like object
241
        - chunk_size_bytes: chunk size override
242
        - metadata: custom metadata dictionary
243
        - session: optional ClientSession
244

245
        Returns:
246
        ObjectId: File identifier
247
        """
248

249
    def upload_from_stream_with_id(
250
        self,
251
        file_id,
252
        filename,
253
        source,
254
        chunk_size_bytes=None,
255
        metadata=None,
256
        session=None
257
    ):
258
        """
259
        Upload file from stream with specific ID.
260

261
        Parameters:
262
        - file_id: file identifier
263
        - filename: file name
264
        - source: readable file-like object
265
        - chunk_size_bytes: chunk size override
266
        - metadata: custom metadata dictionary
267
        - session: optional ClientSession
268
        """
269

270
    def open_download_stream(self, file_id, session=None):
271
        """
272
        Open download stream by file ID.
273

274
        Parameters:
275
        - file_id: file identifier
276
        - session: optional ClientSession
277

278
        Returns:
279
        GridOut: Download stream
280

281
        Raises:
282
        NoFile: if file not found
283
        """
284

285
    def download_to_stream(self, file_id, destination, session=None):
286
        """
287
        Download file to stream by ID.
288

289
        Parameters:
290
        - file_id: file identifier
291
        - destination: writable file-like object
292
        - session: optional ClientSession
293

294
        Raises:
295
        NoFile: if file not found
296
        """
297

298
    def delete(self, file_id, session=None):
299
        """
300
        Delete file by ID.
301

302
        Parameters:
303
        - file_id: file identifier
304
        - session: optional ClientSession
305

306
        Raises:
307
        NoFile: if file not found
308
        """
309

310
    def find(self, filter=None, session=None, **kwargs):
311
        """
312
        Find files matching criteria.
313

314
        Parameters:
315
        - filter: query criteria for files collection
316
        - batch_size: cursor batch size
317
        - limit: maximum number of files
318
        - skip: number of files to skip
319
        - sort: sort specification
320
        - session: optional ClientSession
321

322
        Returns:
323
        GridOutCursor: Cursor for files
324
        """
325

326
    def open_download_stream_by_name(
327
        self,
328
        filename,
329
        revision=-1,
330
        session=None
331
    ):
332
        """
333
        Open download stream by filename.
334

335
        Parameters:
336
        - filename: file name
337
        - revision: file revision (-1 for latest)
338
        - session: optional ClientSession
339

340
        Returns:
341
        GridOut: Download stream
342

343
        Raises:
344
        NoFile: if file not found
345
        """
346

347
    def download_to_stream_by_name(
348
        self,
349
        filename,
350
        destination,
351
        revision=-1,
352
        session=None
353
    ):
354
        """
355
        Download file to stream by name.
356

357
        Parameters:
358
        - filename: file name
359
        - destination: writable file-like object
360
        - revision: file revision (-1 for latest)
361
        - session: optional ClientSession
362

363
        Raises:
364
        NoFile: if file not found
365
        """
366

367
    def rename(self, file_id, new_filename, session=None):
368
        """
369
        Rename file.
370

371
        Parameters:
372
        - file_id: file identifier
373
        - new_filename: new file name
374
        - session: optional ClientSession
375

376
        Raises:
377
        NoFile: if file not found
378
        """
379
```
380

381
### GridFS File Objects
382

383
File objects for reading and writing GridFS files.
384

385
```python { .api }
386
class GridIn:
387
    def __init__(self, root_collection, session=None, disable_md5=False, **kwargs):
388
        """
389
        GridFS file for writing.
390

391
        Parameters:
392
        - root_collection: GridFS root collection
393
        - session: optional ClientSession
394
        - disable_md5: disable MD5 calculation
395
        - kwargs: file metadata
396
        """
397

398
    def write(self, data):
399
        """
400
        Write data to file.
401

402
        Parameters:
403
        - data: bytes to write
404
        """
405

406
    def writelines(self, lines):
407
        """
408
        Write sequence of bytes.
409

410
        Parameters:
411
        - lines: sequence of bytes
412
        """
413

414
    def close(self):
415
        """Close file and finalize upload."""
416

417
    def abort(self):
418
        """Abort upload and delete partial file."""
419

420
    @property
421
    def closed(self):
422
        """
423
        Check if file is closed.
424

425
        Returns:
426
        bool: True if closed
427
        """
428

429
    @property
430
    def _id(self):
431
        """
432
        File identifier.
433

434
        Returns:
435
        ObjectId: File ID
436
        """
437

438
    @property
439
    def filename(self):
440
        """
441
        File name.
442

443
        Returns:
444
        str: File name
445
        """
446

447
    @property
448
    def length(self):
449
        """
450
        File size in bytes.
451

452
        Returns:
453
        int: File size
454
        """
455

456
    @property
457
    def chunk_size(self):
458
        """
459
        Chunk size in bytes.
460

461
        Returns:
462
        int: Chunk size
463
        """
464

465
    @property
466
    def upload_date(self):
467
        """
468
        Upload completion timestamp.
469

470
        Returns:
471
        datetime: Upload date
472
        """
473

474
    @property
475
    def md5(self):
476
        """
477
        MD5 checksum (if enabled).
478

479
        Returns:
480
        str: MD5 hash or None
481
        """
482

483
    @property
484
    def metadata(self):
485
        """
486
        Custom metadata.
487

488
        Returns:
489
        dict: Metadata dictionary
490
        """
491

492
class GridOut:
493
    def __init__(self, root_collection, file_id=None, file_document=None, session=None):
494
        """
495
        GridFS file for reading.
496

497
        Parameters:
498
        - root_collection: GridFS root collection
499
        - file_id: file identifier
500
        - file_document: file document
501
        - session: optional ClientSession
502
        """
503

504
    def read(self, size=-1):
505
        """
506
        Read data from file.
507

508
        Parameters:
509
        - size: bytes to read (-1 for all)
510

511
        Returns:
512
        bytes: File data
513
        """
514

515
    def readline(self, size=-1):
516
        """
517
        Read line from file.
518

519
        Parameters:
520
        - size: maximum bytes to read
521

522
        Returns:
523
        bytes: Line data
524
        """
525

526
    def readlines(self):
527
        """
528
        Read all lines from file.
529

530
        Returns:
531
        list: List of lines as bytes
532
        """
533

534
    def seek(self, pos, whence=0):
535
        """
536
        Seek to file position.
537

538
        Parameters:
539
        - pos: position
540
        - whence: seek mode (0=absolute, 1=relative, 2=from end)
541
        """
542

543
    def tell(self):
544
        """
545
        Get current file position.
546

547
        Returns:
548
        int: Current position
549
        """
550

551
    def close(self):
552
        """Close file."""
553

554
    def __iter__(self):
555
        """Iterate over file lines."""
556

557
    def __enter__(self):
558
        """Context manager entry."""
559

560
    def __exit__(self, exc_type, exc_val, exc_tb):
561
        """Context manager exit."""
562

563
    # Same properties as GridIn
564
    @property
565
    def _id(self): ...
566
    @property
567
    def filename(self): ...
568
    @property
569
    def length(self): ...
570
    @property
571
    def chunk_size(self): ...
572
    @property
573
    def upload_date(self): ...
574
    @property
575
    def md5(self): ...
576
    @property
577
    def metadata(self): ...
578

579
class GridOutCursor:
580
    def __init__(self, collection, filter=None, session=None, **kwargs):
581
        """
582
        Cursor for GridFS files.
583

584
        Parameters:
585
        - collection: files collection
586
        - filter: query criteria
587
        - session: optional ClientSession
588
        - kwargs: cursor options
589
        """
590

591
    def __iter__(self):
592
        """Iterate over files."""
593

594
    def __next__(self):
595
        """Get next file."""
596

597
    def next(self):
598
        """Get next file (Python 2 compatibility)."""
599

600
    def clone(self):
601
        """Clone cursor."""
602

603
    def count(self):
604
        """
605
        Count matching files.
606

607
        Returns:
608
        int: File count
609
        """
610
```
611

612
### Constants and Exceptions
613

614
GridFS-related constants and error handling.
615

616
```python { .api }
617
DEFAULT_CHUNK_SIZE: int  # Default chunk size (255KB)
618

619
class NoFile(Exception):
620
    """Raised when GridFS file is not found."""
621
```
622

623
## Usage Examples
624

625
### Basic GridFS Operations
626

627
```python
628
from pymongo import MongoClient
629
import gridfs
630
from io import BytesIO
631

632
client = MongoClient()
633
db = client.mydb
634
fs = gridfs.GridFS(db)
635

636
# Store a file
637
with open("image.jpg", "rb") as f:
638
    file_id = fs.put(f, filename="profile.jpg", contentType="image/jpeg")
639
print(f"Stored file with ID: {file_id}")
640

641
# Retrieve a file
642
grid_out = fs.get(file_id)
643
with open("downloaded.jpg", "wb") as f:
644
    f.write(grid_out.read())
645

646
print(f"Downloaded {grid_out.filename}, size: {grid_out.length} bytes")
647

648
# Store with metadata
649
file_id = fs.put(
650
    b"Hello, GridFS!",
651
    filename="greeting.txt",
652
    contentType="text/plain",
653
    metadata={"author": "Alice", "tags": ["greeting", "sample"]}
654
)
655

656
# Find and list files
657
for grid_file in fs.find({"metadata.author": "Alice"}):
658
    print(f"File: {grid_file.filename}, Author: {grid_file.metadata['author']}")
659

660
# Delete a file
661
fs.delete(file_id)
662
```
663

664
### GridFSBucket Operations (Recommended)
665

666
```python
667
from pymongo import MongoClient
668
import gridfs
669
from io import BytesIO
670

671
client = MongoClient()
672
db = client.mydb
673
bucket = gridfs.GridFSBucket(db, bucket_name="images")
674

675
# Upload from stream
676
with open("photo.jpg", "rb") as f:
677
    file_id = bucket.upload_from_stream(
678
        "user_photo.jpg",
679
        f,
680
        metadata={"user_id": 12345, "category": "profile"}
681
    )
682

683
print(f"Uploaded photo with ID: {file_id}")
684

685
# Download to stream
686
with open("downloaded_photo.jpg", "wb") as f:
687
    bucket.download_to_stream(file_id, f)
688

689
# Upload with custom chunk size for large files
690
with open("video.mp4", "rb") as f:
691
    file_id = bucket.upload_from_stream(
692
        "presentation.mp4",
693
        f,
694
        chunk_size_bytes=1024*1024,  # 1MB chunks
695
        metadata={"duration": 1800, "resolution": "1080p"}
696
    )
697

698
# Stream processing
699
upload_stream = bucket.open_upload_stream(
700
    "processed_data.csv",
701
    metadata={"processing_date": "2023-06-01"}
702
)
703

704
# Write data in chunks
705
for chunk in process_large_dataset():
706
    upload_stream.write(chunk.encode())
707

708
upload_stream.close()
709
print(f"Processed file ID: {upload_stream._id}")
710
```
711

712
### Advanced GridFS Usage
713

714
```python
715
import gridfs
716
from bson import ObjectId
717
from datetime import datetime
718

719
# Custom GridFS collection
720
fs = gridfs.GridFS(db, collection="documents")
721

722
# Store with specific file ID
723
custom_id = ObjectId()
724
fs.put(
725
    b"Important document content",
726
    _id=custom_id,
727
    filename="contract.pdf",
728
    contentType="application/pdf",
729
    metadata={
730
        "department": "legal",
731
        "confidential": True,
732
        "expires": datetime(2025, 12, 31)
733
    }
734
)
735

736
# Find files with complex queries
737
large_images = fs.find({
738
    "contentType": {"$regex": "^image/"},
739
    "length": {"$gt": 1024*1024},  # > 1MB
740
    "uploadDate": {"$gte": datetime(2023, 1, 1)}
741
}).sort("uploadDate", -1)
742

743
for img in large_images:
744
    print(f"Large image: {img.filename}, {img.length/1024/1024:.1f}MB")
745

746
# Version management by filename
747
versions = list(fs.find({"filename": "document.txt"}).sort("uploadDate", 1))
748
print(f"Found {len(versions)} versions of document.txt")
749

750
# Get latest version
751
latest = fs.get_last_version("document.txt")
752
print(f"Latest version uploaded: {latest.upload_date}")
753

754
# Stream reading
755
grid_out = fs.get(file_id)
756
while True:
757
    chunk = grid_out.read(8192)  # Read 8KB chunks
758
    if not chunk:
759
        break
760
    process_chunk(chunk)
761
grid_out.close()
762
```
763

764
### GridFS with Transactions
765

766
```python
767
import gridfs
768
from pymongo.errors import PyMongoError
769

770
client = MongoClient()
771
db = client.mydb
772
bucket = gridfs.GridFSBucket(db)
773

774
# GridFS operations in transaction
775
with client.start_session() as session:
776
    with session.start_transaction():
777
        try:
778
            # Upload file
779
            with open("data.json", "rb") as f:
780
                file_id = bucket.upload_from_stream(
781
                    "backup.json",
782
                    f,
783
                    session=session
784
                )
785
            
786
            # Update metadata in related collection
787
            db.backups.insert_one({
788
                "file_id": file_id,
789
                "created_date": datetime.now(),
790
                "status": "completed"
791
            }, session=session)
792
            
793
            print("Backup created successfully")
794
            
795
        except PyMongoError as e:
796
            print(f"Backup failed: {e}")
797
            raise  # Will abort transaction
798

799
# Cleanup old backups
800
def cleanup_old_backups(session):
801
    """Remove backups older than 30 days."""
802
    cutoff_date = datetime.now() - timedelta(days=30)
803
    
804
    old_backups = db.backups.find(
805
        {"created_date": {"$lt": cutoff_date}},
806
        session=session
807
    )
808
    
809
    for backup in old_backups:
810
        # Delete GridFS file
811
        bucket.delete(backup["file_id"], session=session)
812
        # Delete metadata
813
        db.backups.delete_one({"_id": backup["_id"]}, session=session)
814

815
# Run cleanup in transaction
816
with client.start_session() as session:
817
    session.with_transaction(cleanup_old_backups)
818
```

Version

Tile

Files

gridfs-storage.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

gridfs-storage.mddocs/