CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-fiona

Fiona reads and writes spatial data files

88

1.10x
Overview
Eval results
Files

collection-management.mddocs/

Collection Management

Advanced collection handling including filtering, iteration patterns, schema management, and batch operations for efficient processing of large geospatial datasets. Collections provide the primary interface for reading and writing geospatial data files with extensive options for controlling data access and modification.

Capabilities

Collection Class

The main interface for reading and writing vector geospatial data. Collections act as file-like objects that can iterate over features, write new features, and manage dataset metadata.

class Collection:
    def __init__(
        self,
        path,
        mode="r",
        driver=None,
        schema=None,
        crs=None,
        encoding=None,
        layer=None,
        vsi=None,
        archive=None,
        enabled_drivers=None,
        crs_wkt=None,
        ignore_fields=None,
        ignore_geometry=False,
        include_fields=None,
        wkt_version=None,
        allow_unsupported_drivers=False,
        **kwargs
    ):
        """
        Create a collection for a vector dataset.

        Parameters:
        - path: str or Path, dataset path
        - mode: str, 'r' (read), 'a' (append), or 'w' (write)
        - driver: str, OGR driver name
        - schema: dict, data schema (required for write mode)
        - crs: str or dict, coordinate reference system
        - encoding: str, text encoding for the dataset
        - layer: int or str, layer index or name for multi-layer datasets
        - vsi: str, virtual file system identifier
        - archive: str, archive file path
        - enabled_drivers: list, restrict to specific drivers
        - crs_wkt: str, CRS in WKT format
        - ignore_fields: list, field names to skip when reading
        - ignore_geometry: bool, skip geometry when reading
        - include_fields: list, only include these fields when reading
        - wkt_version: WktVersion, WKT format version
        - allow_unsupported_drivers: bool, allow experimental drivers
        - kwargs: dict, additional driver-specific options
        """
    
    def filter(self, *args, **kwds):
        """
        Return filtered iterator over records.
        
        Parameters:
        - args: positional arguments for spatial or attribute filters
        - kwds: keyword arguments for filtering options
        
        Returns:
        Iterator over filtered feature records
        """
    
    def items(self, *args, **kwds):
        """
        Return iterator over FID, record pairs.
        
        Parameters:
        - args: positional arguments for filtering
        - kwds: keyword arguments for filtering options
        
        Returns:
        Iterator over (feature_id, feature_record) tuples
        """
    
    def keys(self, *args, **kwds):
        """
        Return iterator over feature IDs.
        
        Parameters:
        - args: positional arguments for filtering
        - kwds: keyword arguments for filtering options
        
        Returns:
        Iterator over feature ID values
        """
    
    def write(self, record):
        """
        Write a single feature record.
        
        Parameters:
        - record: dict, feature record to write
        
        Raises:
        - ValueError: If record doesn't match schema
        - UnsupportedOperation: If collection not opened for writing
        """
    
    def writerecords(self, records):
        """
        Write multiple feature records efficiently.
        
        Parameters:
        - records: iterable of dict, feature records to write
        
        Raises:
        - ValueError: If any record doesn't match schema
        - UnsupportedOperation: If collection not opened for writing
        """
    
    def close(self):
        """Close the collection and release resources."""
    
    def flush(self):
        """Flush pending writes to disk."""
    
    def validate_record(self, record):
        """
        Validate a feature record against the collection schema.
        
        Parameters:
        - record: dict, feature record to validate
        
        Returns:
        bool: True if valid
        
        Raises:
        - SchemaError: If record doesn't match schema
        """
    
    @property
    def driver(self):
        """Get the OGR driver name."""
    
    @property
    def schema(self):
        """Get the data schema dictionary."""
    
    @property
    def crs(self):
        """Get the coordinate reference system."""
    
    @property
    def bounds(self):
        """Get spatial bounds as (minx, miny, maxx, maxy)."""
    
    @property
    def meta(self):
        """Get metadata dictionary."""
    
    @property 
    def profile(self):
        """Get profile dictionary (alias for meta)."""
    
    @property
    def closed(self):
        """Check if collection is closed."""

Usage Examples

import fiona
from fiona.collection import Collection

# Reading with iteration
with Collection('data.shp', 'r') as collection:
    print(f"Schema: {collection.schema}")
    print(f"CRS: {collection.crs}")
    print(f"Bounds: {collection.bounds}")
    
    # Iterate over all features
    for feature in collection:
        print(f"Feature {feature['id']}: {feature['properties']}")

# Writing new collection
schema = {
    'geometry': 'Point',
    'properties': {
        'name': 'str:50',
        'population': 'int',
        'elevation': 'float'
    }
}

with Collection('cities.geojson', 'w', driver='GeoJSON',
                schema=schema, crs='EPSG:4326') as collection:
    
    cities = [
        {
            'geometry': {'type': 'Point', 'coordinates': [-122.4, 37.8]},
            'properties': {'name': 'San Francisco', 'population': 875000, 'elevation': 52.0}
        },
        {
            'geometry': {'type': 'Point', 'coordinates': [-74.0, 40.7]},
            'properties': {'name': 'New York', 'population': 8400000, 'elevation': 10.0}
        }
    ]
    
    # Write multiple records efficiently
    collection.writerecords(cities)

# Field filtering on read
with Collection('detailed_data.shp', 'r', 
                include_fields=['name', 'category']) as collection:
    for feature in collection:
        # Only specified fields are loaded, reducing memory usage
        print(feature['properties'])  # Only has 'name' and 'category'

BytesCollection Class

A specialized collection backed by a bytes buffer, useful for working with data in memory or from network sources.

class BytesCollection(Collection):
    def __init__(self, bytesbuf, **kwds):
        """
        Create collection from bytes buffer.
        
        Parameters:
        - bytesbuf: bytes, buffer containing dataset data
        - kwds: dict, additional Collection parameters
        """

Usage Examples

from fiona.collection import BytesCollection
import requests

# Read GeoJSON from HTTP response
response = requests.get('https://example.com/data.geojson')
geojson_bytes = response.content

with BytesCollection(geojson_bytes) as collection:
    print(f"Driver: {collection.driver}")
    print(f"Feature count: {len(collection)}")
    for feature in collection:
        print(f"Feature: {feature['id']}")

# Work with shapefile bytes (from zip archive)
import zipfile
import io

with zipfile.ZipFile('data.zip', 'r') as archive:
    shp_bytes = archive.read('data.shp')
    shx_bytes = archive.read('data.shx') 
    dbf_bytes = archive.read('data.dbf')
    
    # Create virtual file system paths
    vsi_path = f'/vsimem/temp.shp'
    with BytesCollection(shp_bytes, vsi=vsi_path) as collection:
        # Process shapefile from memory
        for feature in collection:
            process_feature(feature)

# Convert between formats in memory
def convert_bytes_format(input_bytes, from_driver, to_driver):
    """Convert geospatial data between formats using bytes."""
    output_buffer = io.BytesIO()
    
    # Read from input bytes
    with BytesCollection(input_bytes) as input_collection:
        schema = input_collection.schema
        crs = input_collection.crs
        
        # Write to output buffer  
        with fiona.open(output_buffer, 'w', driver=to_driver, 
                       schema=schema, crs=crs) as output:
            for feature in input_collection:
                output.write(feature)
    
    return output_buffer.getvalue()

# Example: Convert GeoJSON bytes to Shapefile bytes
geojson_bytes = b'{"type": "FeatureCollection", "features": [...]}'
shapefile_bytes = convert_bytes_format(geojson_bytes, 'GeoJSON', 'ESRI Shapefile')

Performance Optimization

Collections provide several features for optimizing performance with large datasets:

# Use field filtering to reduce memory usage
with fiona.open('large_dataset.shp', 'r',
                include_fields=['id', 'name']) as collection:
    # Only loads specified fields, faster and uses less memory
    for feature in collection:
        process_minimal_feature(feature)

# Skip geometry for attribute-only processing
with fiona.open('attribute_analysis.shp', 'r',
                ignore_geometry=True) as collection:
    # Much faster when you don't need spatial data
    for feature in collection:
        analyze_attributes(feature['properties'])

# Batch writing for better performance
features = generate_many_features()  # Generator or large list

with fiona.open('output.geojson', 'w', **write_params) as collection:
    # Write in batches rather than one-by-one
    collection.writerecords(features)

Install with Tessl CLI

npx tessl i tessl/pypi-fiona

docs

cli.md

collection-management.md

crs.md

data-model.md

environment.md

file-io.md

index.md

schema.md

transforms.md

utilities.md

tile.json