0
# Collection Management
1
2
Advanced collection handling including filtering, iteration patterns, schema management, and batch operations for efficient processing of large geospatial datasets. Collections provide the primary interface for reading and writing geospatial data files with extensive options for controlling data access and modification.
3
4
## Capabilities
5
6
### Collection Class
7
8
The main interface for reading and writing vector geospatial data. Collections act as file-like objects that can iterate over features, write new features, and manage dataset metadata.
9
10
```python { .api }
11
class Collection:
12
def __init__(
13
self,
14
path,
15
mode="r",
16
driver=None,
17
schema=None,
18
crs=None,
19
encoding=None,
20
layer=None,
21
vsi=None,
22
archive=None,
23
enabled_drivers=None,
24
crs_wkt=None,
25
ignore_fields=None,
26
ignore_geometry=False,
27
include_fields=None,
28
wkt_version=None,
29
allow_unsupported_drivers=False,
30
**kwargs
31
):
32
"""
33
Create a collection for a vector dataset.
34
35
Parameters:
36
- path: str or Path, dataset path
37
- mode: str, 'r' (read), 'a' (append), or 'w' (write)
38
- driver: str, OGR driver name
39
- schema: dict, data schema (required for write mode)
40
- crs: str or dict, coordinate reference system
41
- encoding: str, text encoding for the dataset
42
- layer: int or str, layer index or name for multi-layer datasets
43
- vsi: str, virtual file system identifier
44
- archive: str, archive file path
45
- enabled_drivers: list, restrict to specific drivers
46
- crs_wkt: str, CRS in WKT format
47
- ignore_fields: list, field names to skip when reading
48
- ignore_geometry: bool, skip geometry when reading
49
- include_fields: list, only include these fields when reading
50
- wkt_version: WktVersion, WKT format version
51
- allow_unsupported_drivers: bool, allow experimental drivers
52
- kwargs: dict, additional driver-specific options
53
"""
54
55
def filter(self, *args, **kwds):
56
"""
57
Return filtered iterator over records.
58
59
Parameters:
60
- args: positional arguments for spatial or attribute filters
61
- kwds: keyword arguments for filtering options
62
63
Returns:
64
Iterator over filtered feature records
65
"""
66
67
def items(self, *args, **kwds):
68
"""
69
Return iterator over FID, record pairs.
70
71
Parameters:
72
- args: positional arguments for filtering
73
- kwds: keyword arguments for filtering options
74
75
Returns:
76
Iterator over (feature_id, feature_record) tuples
77
"""
78
79
def keys(self, *args, **kwds):
80
"""
81
Return iterator over feature IDs.
82
83
Parameters:
84
- args: positional arguments for filtering
85
- kwds: keyword arguments for filtering options
86
87
Returns:
88
Iterator over feature ID values
89
"""
90
91
def write(self, record):
92
"""
93
Write a single feature record.
94
95
Parameters:
96
- record: dict, feature record to write
97
98
Raises:
99
- ValueError: If record doesn't match schema
100
- UnsupportedOperation: If collection not opened for writing
101
"""
102
103
def writerecords(self, records):
104
"""
105
Write multiple feature records efficiently.
106
107
Parameters:
108
- records: iterable of dict, feature records to write
109
110
Raises:
111
- ValueError: If any record doesn't match schema
112
- UnsupportedOperation: If collection not opened for writing
113
"""
114
115
def close(self):
116
"""Close the collection and release resources."""
117
118
def flush(self):
119
"""Flush pending writes to disk."""
120
121
def validate_record(self, record):
122
"""
123
Validate a feature record against the collection schema.
124
125
Parameters:
126
- record: dict, feature record to validate
127
128
Returns:
129
bool: True if valid
130
131
Raises:
132
- SchemaError: If record doesn't match schema
133
"""
134
135
@property
136
def driver(self):
137
"""Get the OGR driver name."""
138
139
@property
140
def schema(self):
141
"""Get the data schema dictionary."""
142
143
@property
144
def crs(self):
145
"""Get the coordinate reference system."""
146
147
@property
148
def bounds(self):
149
"""Get spatial bounds as (minx, miny, maxx, maxy)."""
150
151
@property
152
def meta(self):
153
"""Get metadata dictionary."""
154
155
@property
156
def profile(self):
157
"""Get profile dictionary (alias for meta)."""
158
159
@property
160
def closed(self):
161
"""Check if collection is closed."""
162
```
163
164
#### Usage Examples
165
166
```python
167
import fiona
168
from fiona.collection import Collection
169
170
# Reading with iteration
171
with Collection('data.shp', 'r') as collection:
172
print(f"Schema: {collection.schema}")
173
print(f"CRS: {collection.crs}")
174
print(f"Bounds: {collection.bounds}")
175
176
# Iterate over all features
177
for feature in collection:
178
print(f"Feature {feature['id']}: {feature['properties']}")
179
180
# Writing new collection
181
schema = {
182
'geometry': 'Point',
183
'properties': {
184
'name': 'str:50',
185
'population': 'int',
186
'elevation': 'float'
187
}
188
}
189
190
with Collection('cities.geojson', 'w', driver='GeoJSON',
191
schema=schema, crs='EPSG:4326') as collection:
192
193
cities = [
194
{
195
'geometry': {'type': 'Point', 'coordinates': [-122.4, 37.8]},
196
'properties': {'name': 'San Francisco', 'population': 875000, 'elevation': 52.0}
197
},
198
{
199
'geometry': {'type': 'Point', 'coordinates': [-74.0, 40.7]},
200
'properties': {'name': 'New York', 'population': 8400000, 'elevation': 10.0}
201
}
202
]
203
204
# Write multiple records efficiently
205
collection.writerecords(cities)
206
207
# Field filtering on read
208
with Collection('detailed_data.shp', 'r',
209
include_fields=['name', 'category']) as collection:
210
for feature in collection:
211
# Only specified fields are loaded, reducing memory usage
212
print(feature['properties']) # Only has 'name' and 'category'
213
```
214
215
### BytesCollection Class
216
217
A specialized collection backed by a bytes buffer, useful for working with data in memory or from network sources.
218
219
```python { .api }
220
class BytesCollection(Collection):
221
def __init__(self, bytesbuf, **kwds):
222
"""
223
Create collection from bytes buffer.
224
225
Parameters:
226
- bytesbuf: bytes, buffer containing dataset data
227
- kwds: dict, additional Collection parameters
228
"""
229
```
230
231
#### Usage Examples
232
233
```python
234
from fiona.collection import BytesCollection
235
import requests
236
237
# Read GeoJSON from HTTP response
238
response = requests.get('https://example.com/data.geojson')
239
geojson_bytes = response.content
240
241
with BytesCollection(geojson_bytes) as collection:
242
print(f"Driver: {collection.driver}")
243
print(f"Feature count: {len(collection)}")
244
for feature in collection:
245
print(f"Feature: {feature['id']}")
246
247
# Work with shapefile bytes (from zip archive)
248
import zipfile
249
import io
250
251
with zipfile.ZipFile('data.zip', 'r') as archive:
252
shp_bytes = archive.read('data.shp')
253
shx_bytes = archive.read('data.shx')
254
dbf_bytes = archive.read('data.dbf')
255
256
# Create virtual file system paths
257
vsi_path = f'/vsimem/temp.shp'
258
with BytesCollection(shp_bytes, vsi=vsi_path) as collection:
259
# Process shapefile from memory
260
for feature in collection:
261
process_feature(feature)
262
263
# Convert between formats in memory
264
def convert_bytes_format(input_bytes, from_driver, to_driver):
265
"""Convert geospatial data between formats using bytes."""
266
output_buffer = io.BytesIO()
267
268
# Read from input bytes
269
with BytesCollection(input_bytes) as input_collection:
270
schema = input_collection.schema
271
crs = input_collection.crs
272
273
# Write to output buffer
274
with fiona.open(output_buffer, 'w', driver=to_driver,
275
schema=schema, crs=crs) as output:
276
for feature in input_collection:
277
output.write(feature)
278
279
return output_buffer.getvalue()
280
281
# Example: Convert GeoJSON bytes to Shapefile bytes
282
geojson_bytes = b'{"type": "FeatureCollection", "features": [...]}'
283
shapefile_bytes = convert_bytes_format(geojson_bytes, 'GeoJSON', 'ESRI Shapefile')
284
```
285
286
## Performance Optimization
287
288
Collections provide several features for optimizing performance with large datasets:
289
290
```python
291
# Use field filtering to reduce memory usage
292
with fiona.open('large_dataset.shp', 'r',
293
include_fields=['id', 'name']) as collection:
294
# Only loads specified fields, faster and uses less memory
295
for feature in collection:
296
process_minimal_feature(feature)
297
298
# Skip geometry for attribute-only processing
299
with fiona.open('attribute_analysis.shp', 'r',
300
ignore_geometry=True) as collection:
301
# Much faster when you don't need spatial data
302
for feature in collection:
303
analyze_attributes(feature['properties'])
304
305
# Batch writing for better performance
306
features = generate_many_features() # Generator or large list
307
308
with fiona.open('output.geojson', 'w', **write_params) as collection:
309
# Write in batches rather than one-by-one
310
collection.writerecords(features)
311
```