or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli.mdcollection-management.mdcrs.mddata-model.mdenvironment.mdfile-io.mdindex.mdschema.mdtransforms.mdutilities.md

collection-management.mddocs/

0

# Collection Management

1

2

Advanced collection handling including filtering, iteration patterns, schema management, and batch operations for efficient processing of large geospatial datasets. Collections provide the primary interface for reading and writing geospatial data files with extensive options for controlling data access and modification.

3

4

## Capabilities

5

6

### Collection Class

7

8

The main interface for reading and writing vector geospatial data. Collections act as file-like objects that can iterate over features, write new features, and manage dataset metadata.

9

10

```python { .api }

11

class Collection:

12

def __init__(

13

self,

14

path,

15

mode="r",

16

driver=None,

17

schema=None,

18

crs=None,

19

encoding=None,

20

layer=None,

21

vsi=None,

22

archive=None,

23

enabled_drivers=None,

24

crs_wkt=None,

25

ignore_fields=None,

26

ignore_geometry=False,

27

include_fields=None,

28

wkt_version=None,

29

allow_unsupported_drivers=False,

30

**kwargs

31

):

32

"""

33

Create a collection for a vector dataset.

34

35

Parameters:

36

- path: str or Path, dataset path

37

- mode: str, 'r' (read), 'a' (append), or 'w' (write)

38

- driver: str, OGR driver name

39

- schema: dict, data schema (required for write mode)

40

- crs: str or dict, coordinate reference system

41

- encoding: str, text encoding for the dataset

42

- layer: int or str, layer index or name for multi-layer datasets

43

- vsi: str, virtual file system identifier

44

- archive: str, archive file path

45

- enabled_drivers: list, restrict to specific drivers

46

- crs_wkt: str, CRS in WKT format

47

- ignore_fields: list, field names to skip when reading

48

- ignore_geometry: bool, skip geometry when reading

49

- include_fields: list, only include these fields when reading

50

- wkt_version: WktVersion, WKT format version

51

- allow_unsupported_drivers: bool, allow experimental drivers

52

- kwargs: dict, additional driver-specific options

53

"""

54

55

def filter(self, *args, **kwds):

56

"""

57

Return filtered iterator over records.

58

59

Parameters:

60

- args: positional arguments for spatial or attribute filters

61

- kwds: keyword arguments for filtering options

62

63

Returns:

64

Iterator over filtered feature records

65

"""

66

67

def items(self, *args, **kwds):

68

"""

69

Return iterator over FID, record pairs.

70

71

Parameters:

72

- args: positional arguments for filtering

73

- kwds: keyword arguments for filtering options

74

75

Returns:

76

Iterator over (feature_id, feature_record) tuples

77

"""

78

79

def keys(self, *args, **kwds):

80

"""

81

Return iterator over feature IDs.

82

83

Parameters:

84

- args: positional arguments for filtering

85

- kwds: keyword arguments for filtering options

86

87

Returns:

88

Iterator over feature ID values

89

"""

90

91

def write(self, record):

92

"""

93

Write a single feature record.

94

95

Parameters:

96

- record: dict, feature record to write

97

98

Raises:

99

- ValueError: If record doesn't match schema

100

- UnsupportedOperation: If collection not opened for writing

101

"""

102

103

def writerecords(self, records):

104

"""

105

Write multiple feature records efficiently.

106

107

Parameters:

108

- records: iterable of dict, feature records to write

109

110

Raises:

111

- ValueError: If any record doesn't match schema

112

- UnsupportedOperation: If collection not opened for writing

113

"""

114

115

def close(self):

116

"""Close the collection and release resources."""

117

118

def flush(self):

119

"""Flush pending writes to disk."""

120

121

def validate_record(self, record):

122

"""

123

Validate a feature record against the collection schema.

124

125

Parameters:

126

- record: dict, feature record to validate

127

128

Returns:

129

bool: True if valid

130

131

Raises:

132

- SchemaError: If record doesn't match schema

133

"""

134

135

@property

136

def driver(self):

137

"""Get the OGR driver name."""

138

139

@property

140

def schema(self):

141

"""Get the data schema dictionary."""

142

143

@property

144

def crs(self):

145

"""Get the coordinate reference system."""

146

147

@property

148

def bounds(self):

149

"""Get spatial bounds as (minx, miny, maxx, maxy)."""

150

151

@property

152

def meta(self):

153

"""Get metadata dictionary."""

154

155

@property

156

def profile(self):

157

"""Get profile dictionary (alias for meta)."""

158

159

@property

160

def closed(self):

161

"""Check if collection is closed."""

162

```

163

164

#### Usage Examples

165

166

```python

167

import fiona

168

from fiona.collection import Collection

169

170

# Reading with iteration

171

with Collection('data.shp', 'r') as collection:

172

print(f"Schema: {collection.schema}")

173

print(f"CRS: {collection.crs}")

174

print(f"Bounds: {collection.bounds}")

175

176

# Iterate over all features

177

for feature in collection:

178

print(f"Feature {feature['id']}: {feature['properties']}")

179

180

# Writing new collection

181

schema = {

182

'geometry': 'Point',

183

'properties': {

184

'name': 'str:50',

185

'population': 'int',

186

'elevation': 'float'

187

}

188

}

189

190

with Collection('cities.geojson', 'w', driver='GeoJSON',

191

schema=schema, crs='EPSG:4326') as collection:

192

193

cities = [

194

{

195

'geometry': {'type': 'Point', 'coordinates': [-122.4, 37.8]},

196

'properties': {'name': 'San Francisco', 'population': 875000, 'elevation': 52.0}

197

},

198

{

199

'geometry': {'type': 'Point', 'coordinates': [-74.0, 40.7]},

200

'properties': {'name': 'New York', 'population': 8400000, 'elevation': 10.0}

201

}

202

]

203

204

# Write multiple records efficiently

205

collection.writerecords(cities)

206

207

# Field filtering on read

208

with Collection('detailed_data.shp', 'r',

209

include_fields=['name', 'category']) as collection:

210

for feature in collection:

211

# Only specified fields are loaded, reducing memory usage

212

print(feature['properties']) # Only has 'name' and 'category'

213

```

214

215

### BytesCollection Class

216

217

A specialized collection backed by a bytes buffer, useful for working with data in memory or from network sources.

218

219

```python { .api }

220

class BytesCollection(Collection):

221

def __init__(self, bytesbuf, **kwds):

222

"""

223

Create collection from bytes buffer.

224

225

Parameters:

226

- bytesbuf: bytes, buffer containing dataset data

227

- kwds: dict, additional Collection parameters

228

"""

229

```

230

231

#### Usage Examples

232

233

```python

234

from fiona.collection import BytesCollection

235

import requests

236

237

# Read GeoJSON from HTTP response

238

response = requests.get('https://example.com/data.geojson')

239

geojson_bytes = response.content

240

241

with BytesCollection(geojson_bytes) as collection:

242

print(f"Driver: {collection.driver}")

243

print(f"Feature count: {len(collection)}")

244

for feature in collection:

245

print(f"Feature: {feature['id']}")

246

247

# Work with shapefile bytes (from zip archive)

248

import zipfile

249

import io

250

251

with zipfile.ZipFile('data.zip', 'r') as archive:

252

shp_bytes = archive.read('data.shp')

253

shx_bytes = archive.read('data.shx')

254

dbf_bytes = archive.read('data.dbf')

255

256

# Create virtual file system paths

257

vsi_path = f'/vsimem/temp.shp'

258

with BytesCollection(shp_bytes, vsi=vsi_path) as collection:

259

# Process shapefile from memory

260

for feature in collection:

261

process_feature(feature)

262

263

# Convert between formats in memory

264

def convert_bytes_format(input_bytes, from_driver, to_driver):

265

"""Convert geospatial data between formats using bytes."""

266

output_buffer = io.BytesIO()

267

268

# Read from input bytes

269

with BytesCollection(input_bytes) as input_collection:

270

schema = input_collection.schema

271

crs = input_collection.crs

272

273

# Write to output buffer

274

with fiona.open(output_buffer, 'w', driver=to_driver,

275

schema=schema, crs=crs) as output:

276

for feature in input_collection:

277

output.write(feature)

278

279

return output_buffer.getvalue()

280

281

# Example: Convert GeoJSON bytes to Shapefile bytes

282

geojson_bytes = b'{"type": "FeatureCollection", "features": [...]}'

283

shapefile_bytes = convert_bytes_format(geojson_bytes, 'GeoJSON', 'ESRI Shapefile')

284

```

285

286

## Performance Optimization

287

288

Collections provide several features for optimizing performance with large datasets:

289

290

```python

291

# Use field filtering to reduce memory usage

292

with fiona.open('large_dataset.shp', 'r',

293

include_fields=['id', 'name']) as collection:

294

# Only loads specified fields, faster and uses less memory

295

for feature in collection:

296

process_minimal_feature(feature)

297

298

# Skip geometry for attribute-only processing

299

with fiona.open('attribute_analysis.shp', 'r',

300

ignore_geometry=True) as collection:

301

# Much faster when you don't need spatial data

302

for feature in collection:

303

analyze_attributes(feature['properties'])

304

305

# Batch writing for better performance

306

features = generate_many_features() # Generator or large list

307

308

with fiona.open('output.geojson', 'w', **write_params) as collection:

309

# Write in batches rather than one-by-one

310

collection.writerecords(features)

311

```