Fiona reads and writes spatial data files
88
Advanced collection handling including filtering, iteration patterns, schema management, and batch operations for efficient processing of large geospatial datasets. Collections provide the primary interface for reading and writing geospatial data files with extensive options for controlling data access and modification.
The main interface for reading and writing vector geospatial data. Collections act as file-like objects that can iterate over features, write new features, and manage dataset metadata.
class Collection:
def __init__(
self,
path,
mode="r",
driver=None,
schema=None,
crs=None,
encoding=None,
layer=None,
vsi=None,
archive=None,
enabled_drivers=None,
crs_wkt=None,
ignore_fields=None,
ignore_geometry=False,
include_fields=None,
wkt_version=None,
allow_unsupported_drivers=False,
**kwargs
):
"""
Create a collection for a vector dataset.
Parameters:
- path: str or Path, dataset path
- mode: str, 'r' (read), 'a' (append), or 'w' (write)
- driver: str, OGR driver name
- schema: dict, data schema (required for write mode)
- crs: str or dict, coordinate reference system
- encoding: str, text encoding for the dataset
- layer: int or str, layer index or name for multi-layer datasets
- vsi: str, virtual file system identifier
- archive: str, archive file path
- enabled_drivers: list, restrict to specific drivers
- crs_wkt: str, CRS in WKT format
- ignore_fields: list, field names to skip when reading
- ignore_geometry: bool, skip geometry when reading
- include_fields: list, only include these fields when reading
- wkt_version: WktVersion, WKT format version
- allow_unsupported_drivers: bool, allow experimental drivers
- kwargs: dict, additional driver-specific options
"""
def filter(self, *args, **kwds):
"""
Return filtered iterator over records.
Parameters:
- args: positional arguments for spatial or attribute filters
- kwds: keyword arguments for filtering options
Returns:
Iterator over filtered feature records
"""
def items(self, *args, **kwds):
"""
Return iterator over FID, record pairs.
Parameters:
- args: positional arguments for filtering
- kwds: keyword arguments for filtering options
Returns:
Iterator over (feature_id, feature_record) tuples
"""
def keys(self, *args, **kwds):
"""
Return iterator over feature IDs.
Parameters:
- args: positional arguments for filtering
- kwds: keyword arguments for filtering options
Returns:
Iterator over feature ID values
"""
def write(self, record):
"""
Write a single feature record.
Parameters:
- record: dict, feature record to write
Raises:
- ValueError: If record doesn't match schema
- UnsupportedOperation: If collection not opened for writing
"""
def writerecords(self, records):
"""
Write multiple feature records efficiently.
Parameters:
- records: iterable of dict, feature records to write
Raises:
- ValueError: If any record doesn't match schema
- UnsupportedOperation: If collection not opened for writing
"""
def close(self):
"""Close the collection and release resources."""
def flush(self):
"""Flush pending writes to disk."""
def validate_record(self, record):
"""
Validate a feature record against the collection schema.
Parameters:
- record: dict, feature record to validate
Returns:
bool: True if valid
Raises:
- SchemaError: If record doesn't match schema
"""
@property
def driver(self):
"""Get the OGR driver name."""
@property
def schema(self):
"""Get the data schema dictionary."""
@property
def crs(self):
"""Get the coordinate reference system."""
@property
def bounds(self):
"""Get spatial bounds as (minx, miny, maxx, maxy)."""
@property
def meta(self):
"""Get metadata dictionary."""
@property
def profile(self):
"""Get profile dictionary (alias for meta)."""
@property
def closed(self):
"""Check if collection is closed."""import fiona
from fiona.collection import Collection
# Reading with iteration
with Collection('data.shp', 'r') as collection:
print(f"Schema: {collection.schema}")
print(f"CRS: {collection.crs}")
print(f"Bounds: {collection.bounds}")
# Iterate over all features
for feature in collection:
print(f"Feature {feature['id']}: {feature['properties']}")
# Writing new collection
schema = {
'geometry': 'Point',
'properties': {
'name': 'str:50',
'population': 'int',
'elevation': 'float'
}
}
with Collection('cities.geojson', 'w', driver='GeoJSON',
schema=schema, crs='EPSG:4326') as collection:
cities = [
{
'geometry': {'type': 'Point', 'coordinates': [-122.4, 37.8]},
'properties': {'name': 'San Francisco', 'population': 875000, 'elevation': 52.0}
},
{
'geometry': {'type': 'Point', 'coordinates': [-74.0, 40.7]},
'properties': {'name': 'New York', 'population': 8400000, 'elevation': 10.0}
}
]
# Write multiple records efficiently
collection.writerecords(cities)
# Field filtering on read
with Collection('detailed_data.shp', 'r',
include_fields=['name', 'category']) as collection:
for feature in collection:
# Only specified fields are loaded, reducing memory usage
print(feature['properties']) # Only has 'name' and 'category'A specialized collection backed by a bytes buffer, useful for working with data in memory or from network sources.
class BytesCollection(Collection):
def __init__(self, bytesbuf, **kwds):
"""
Create collection from bytes buffer.
Parameters:
- bytesbuf: bytes, buffer containing dataset data
- kwds: dict, additional Collection parameters
"""from fiona.collection import BytesCollection
import requests
# Read GeoJSON from HTTP response
response = requests.get('https://example.com/data.geojson')
geojson_bytes = response.content
with BytesCollection(geojson_bytes) as collection:
print(f"Driver: {collection.driver}")
print(f"Feature count: {len(collection)}")
for feature in collection:
print(f"Feature: {feature['id']}")
# Work with shapefile bytes (from zip archive)
import zipfile
import io
with zipfile.ZipFile('data.zip', 'r') as archive:
shp_bytes = archive.read('data.shp')
shx_bytes = archive.read('data.shx')
dbf_bytes = archive.read('data.dbf')
# Create virtual file system paths
vsi_path = f'/vsimem/temp.shp'
with BytesCollection(shp_bytes, vsi=vsi_path) as collection:
# Process shapefile from memory
for feature in collection:
process_feature(feature)
# Convert between formats in memory
def convert_bytes_format(input_bytes, from_driver, to_driver):
"""Convert geospatial data between formats using bytes."""
output_buffer = io.BytesIO()
# Read from input bytes
with BytesCollection(input_bytes) as input_collection:
schema = input_collection.schema
crs = input_collection.crs
# Write to output buffer
with fiona.open(output_buffer, 'w', driver=to_driver,
schema=schema, crs=crs) as output:
for feature in input_collection:
output.write(feature)
return output_buffer.getvalue()
# Example: Convert GeoJSON bytes to Shapefile bytes
geojson_bytes = b'{"type": "FeatureCollection", "features": [...]}'
shapefile_bytes = convert_bytes_format(geojson_bytes, 'GeoJSON', 'ESRI Shapefile')Collections provide several features for optimizing performance with large datasets:
# Use field filtering to reduce memory usage
with fiona.open('large_dataset.shp', 'r',
include_fields=['id', 'name']) as collection:
# Only loads specified fields, faster and uses less memory
for feature in collection:
process_minimal_feature(feature)
# Skip geometry for attribute-only processing
with fiona.open('attribute_analysis.shp', 'r',
ignore_geometry=True) as collection:
# Much faster when you don't need spatial data
for feature in collection:
analyze_attributes(feature['properties'])
# Batch writing for better performance
features = generate_many_features() # Generator or large list
with fiona.open('output.geojson', 'w', **write_params) as collection:
# Write in batches rather than one-by-one
collection.writerecords(features)Install with Tessl CLI
npx tessl i tessl/pypi-fionadocs
evals
scenario-1
scenario-2
scenario-3
scenario-4
scenario-5
scenario-6
scenario-7
scenario-8
scenario-9
scenario-10