0
# Core Data Structures
1
2
GeoPandas provides two main data structures that extend pandas functionality with geospatial capabilities: GeoDataFrame and GeoSeries. These classes maintain all pandas functionality while adding geometry column support, spatial operations, and coordinate reference system management.
3
4
## Capabilities
5
6
### GeoDataFrame Class
7
8
GeoDataFrame extends pandas DataFrame to support geometry columns and spatial operations. It maintains a reference to the active geometry column and provides methods for spatial analysis, coordinate transformations, and geospatial file I/O.
9
10
```python { .api }
11
class GeoDataFrame(DataFrame):
12
def __init__(data=None, index=None, columns=None, dtype=None, copy=None, geometry=None, crs=None):
13
"""
14
Initialize GeoDataFrame with optional geometry column and CRS.
15
16
Parameters:
17
- data: DataFrame-like data
18
- index: Index to use for resulting frame
19
- columns: Column labels to use for resulting frame
20
- dtype: Data type to force
21
- copy: Copy data from inputs
22
- geometry: Column name or array-like of geometry objects
23
- crs: Coordinate Reference System
24
"""
25
...
26
27
@property
28
def geometry(self) -> GeoSeries:
29
"""Active geometry column as GeoSeries."""
30
...
31
32
@geometry.setter
33
def geometry(self, col):
34
"""Set the active geometry column."""
35
...
36
37
@property
38
def active_geometry_name(self) -> str:
39
"""Name of the active geometry column."""
40
...
41
42
@property
43
def crs(self):
44
"""Coordinate Reference System of the geometry column."""
45
...
46
47
@crs.setter
48
def crs(self, value):
49
"""Set the Coordinate Reference System."""
50
...
51
52
@property
53
def total_bounds(self) -> tuple:
54
"""Return bounding box of all geometries as (minx, miny, maxx, maxy)."""
55
...
56
57
@property
58
def sindex(self):
59
"""Spatial index for efficient spatial queries."""
60
...
61
62
def set_geometry(self, col, drop=False, inplace=False, crs=None):
63
"""
64
Set the active geometry column.
65
66
Parameters:
67
- col: Column name or array-like geometry data
68
- drop: Delete the column used for geometry
69
- inplace: Whether to return a new GeoDataFrame or modify in place
70
- crs: Coordinate reference system to use
71
72
Returns:
73
- GeoDataFrame or None if inplace=True
74
"""
75
...
76
77
def rename_geometry(self, col, inplace=False):
78
"""
79
Rename the active geometry column.
80
81
Parameters:
82
- col: New name for geometry column
83
- inplace: Whether to return a new GeoDataFrame or modify in place
84
85
Returns:
86
- GeoDataFrame or None if inplace=True
87
"""
88
...
89
90
def set_crs(self, crs, allow_override=False, inplace=False):
91
"""
92
Set the Coordinate Reference System.
93
94
Parameters:
95
- crs: CRS to set (string, dict, or pyproj.CRS)
96
- allow_override: Allow overriding existing CRS
97
- inplace: Whether to return a new GeoDataFrame or modify in place
98
99
Returns:
100
- GeoDataFrame or None if inplace=True
101
"""
102
...
103
104
def to_crs(self, crs=None, epsg=None, inplace=False):
105
"""
106
Transform geometries to a new coordinate reference system.
107
108
Parameters:
109
- crs: Target CRS (string, dict, or pyproj.CRS)
110
- epsg: EPSG code for target CRS
111
- inplace: Whether to return a new GeoDataFrame or modify in place
112
113
Returns:
114
- GeoDataFrame or None if inplace=True
115
"""
116
...
117
118
def estimate_utm_crs(self, datum_name='WGS 84'):
119
"""
120
Estimate the most appropriate UTM CRS for the geometries.
121
122
Parameters:
123
- datum_name: Name of the datum to use
124
125
Returns:
126
- pyproj.CRS: Estimated UTM CRS
127
"""
128
...
129
130
@classmethod
131
def from_arrow(cls, table, geometry=None, **kwargs):
132
"""
133
Create GeoDataFrame from Apache Arrow Table.
134
135
Parameters:
136
- table: Arrow Table containing the data
137
- geometry: Column name to use as geometry column
138
- **kwargs: Additional arguments
139
140
Returns:
141
- GeoDataFrame: New GeoDataFrame instance
142
"""
143
...
144
145
def to_geo_dict(self, **kwargs):
146
"""
147
Export as GeoJSON-like dictionary.
148
149
Parameters:
150
- **kwargs: Additional arguments for GeoJSON serialization
151
152
Returns:
153
- dict: GeoJSON-like dictionary representation
154
"""
155
...
156
157
def iterfeatures(self, na='null', precision=None, drop_id=False, to_wgs84=False, **kwargs):
158
"""
159
Iterate over rows as GeoJSON-like feature dictionaries.
160
161
Parameters:
162
- na: How to handle null values
163
- precision: Coordinate precision for output
164
- drop_id: Whether to drop the id field
165
- to_wgs84: Transform to WGS84 before output
166
- **kwargs: Additional arguments
167
168
Yields:
169
- dict: GeoJSON-like feature dictionaries
170
"""
171
...
172
173
def dissolve(self, by=None, aggfunc='first', as_index=True, level=None, sort=True, observed=False, dropna=True):
174
"""
175
Dissolve geometries based on grouping variables.
176
177
Parameters:
178
- by: Column name(s) to group by
179
- aggfunc: Aggregation function for non-geometry columns
180
- as_index: Return group labels as the index
181
- level: Level(s) to group by if index is MultiIndex
182
- sort: Sort the result by group keys
183
- observed: Use observed values for categorical groupers
184
- dropna: Drop groups with null values
185
186
Returns:
187
- GeoDataFrame: Dissolved geometries
188
"""
189
...
190
191
def explode(self, column=None, ignore_index=False, index_parts=False):
192
"""
193
Explode multi-part geometries into separate rows.
194
195
Parameters:
196
- column: Column to explode (defaults to geometry column)
197
- ignore_index: Reset index in the result
198
- index_parts: Include part index in result
199
200
Returns:
201
- GeoDataFrame: Exploded geometries
202
"""
203
...
204
205
def cx(self):
206
"""Coordinate-based indexer for spatial selection using bounding box."""
207
...
208
```
209
210
### GeoSeries Class
211
212
GeoSeries extends pandas Series to hold geometry objects with spatial methods and properties. It provides the foundation for geometric operations and spatial analysis in GeoPandas.
213
214
```python { .api }
215
class GeoSeries(Series):
216
def __init__(data=None, index=None, crs=None, **kwargs):
217
"""
218
Initialize GeoSeries with geometry data and CRS.
219
220
Parameters:
221
- data: Array-like geometry data
222
- index: Index to use for resulting series
223
- crs: Coordinate Reference System
224
- **kwargs: Additional Series parameters
225
"""
226
...
227
228
@property
229
def crs(self):
230
"""Coordinate Reference System of the geometries."""
231
...
232
233
@crs.setter
234
def crs(self, value):
235
"""Set the Coordinate Reference System."""
236
...
237
238
@property
239
def geometry(self) -> 'GeoSeries':
240
"""Return self (for consistency with GeoDataFrame)."""
241
...
242
243
@property
244
def x(self) -> Series:
245
"""X coordinates of Point geometries."""
246
...
247
248
@property
249
def y(self) -> Series:
250
"""Y coordinates of Point geometries."""
251
...
252
253
@property
254
def z(self) -> Series:
255
"""Z coordinates of Point geometries."""
256
...
257
258
@property
259
def m(self) -> Series:
260
"""M coordinates of Point geometries."""
261
...
262
263
@property
264
def total_bounds(self) -> tuple:
265
"""Return bounding box of all geometries as (minx, miny, maxx, maxy)."""
266
...
267
268
@property
269
def sindex(self):
270
"""Spatial index for efficient spatial queries."""
271
...
272
273
def set_crs(self, crs, allow_override=False, inplace=False):
274
"""
275
Set the Coordinate Reference System.
276
277
Parameters:
278
- crs: CRS to set (string, dict, or pyproj.CRS)
279
- allow_override: Allow overriding existing CRS
280
- inplace: Whether to return a new GeoSeries or modify in place
281
282
Returns:
283
- GeoSeries or None if inplace=True
284
"""
285
...
286
287
def to_crs(self, crs=None, epsg=None, inplace=False):
288
"""
289
Transform geometries to a new coordinate reference system.
290
291
Parameters:
292
- crs: Target CRS (string, dict, or pyproj.CRS)
293
- epsg: EPSG code for target CRS
294
- inplace: Whether to return a new GeoSeries or modify in place
295
296
Returns:
297
- GeoSeries or None if inplace=True
298
"""
299
...
300
301
def estimate_utm_crs(self, datum_name='WGS 84'):
302
"""
303
Estimate the most appropriate UTM CRS for the geometries.
304
305
Parameters:
306
- datum_name: Name of the datum to use
307
308
Returns:
309
- pyproj.CRS: Estimated UTM CRS
310
"""
311
...
312
313
@classmethod
314
def from_arrow(cls, arr, **kwargs):
315
"""
316
Create GeoSeries from Apache Arrow array.
317
318
Parameters:
319
- arr: Arrow array containing geometry data
320
- **kwargs: Additional arguments
321
322
Returns:
323
- GeoSeries: New GeoSeries instance
324
"""
325
...
326
327
@classmethod
328
def from_wkb(cls, data, index=None, crs=None, **kwargs):
329
"""
330
Create GeoSeries from Well-Known Binary (WKB) data.
331
332
Parameters:
333
- data: Array-like of WKB bytes
334
- index: Index to use for resulting series
335
- crs: Coordinate Reference System
336
- **kwargs: Additional arguments
337
338
Returns:
339
- GeoSeries: New GeoSeries instance
340
"""
341
...
342
343
@classmethod
344
def from_wkt(cls, data, index=None, crs=None, **kwargs):
345
"""
346
Create GeoSeries from Well-Known Text (WKT) data.
347
348
Parameters:
349
- data: Array-like of WKT strings
350
- index: Index to use for resulting series
351
- crs: Coordinate Reference System
352
- **kwargs: Additional arguments
353
354
Returns:
355
- GeoSeries: New GeoSeries instance
356
"""
357
...
358
359
@classmethod
360
def from_xy(cls, x, y, z=None, index=None, crs=None, **kwargs):
361
"""
362
Create GeoSeries of Point geometries from coordinate arrays.
363
364
Parameters:
365
- x: Array-like of x coordinates
366
- y: Array-like of y coordinates
367
- z: Array-like of z coordinates (optional)
368
- index: Index to use for resulting series
369
- crs: Coordinate Reference System
370
- **kwargs: Additional arguments
371
372
Returns:
373
- GeoSeries: New GeoSeries of Point geometries
374
"""
375
...
376
```
377
378
### Constructor Functions
379
380
Functions for creating geometry arrays and series from various input formats.
381
382
```python { .api }
383
def points_from_xy(x, y, z=None, crs=None):
384
"""
385
Create Point geometries from x, y coordinates.
386
387
Parameters:
388
- x: Array-like of x coordinates
389
- y: Array-like of y coordinates
390
- z: Array-like of z coordinates (optional)
391
- crs: Coordinate Reference System
392
393
Returns:
394
- GeoSeries: Series of Point geometries
395
"""
396
...
397
```
398
399
### Testing Utilities
400
401
Functions for testing equality of geospatial objects.
402
403
```python { .api }
404
def assert_geoseries_equal(left, right, check_dtype=True, check_index_type=True, check_series_type=True, check_names=True, check_crs=True, check_geom_type=False, check_less_precise=False, normalize=False):
405
"""
406
Assert that two GeoSeries are equal.
407
408
Parameters:
409
- left: GeoSeries to compare
410
- right: GeoSeries to compare
411
- check_dtype: Whether to check dtype equivalence
412
- check_index_type: Whether to check index type equivalence
413
- check_series_type: Whether to check series type equivalence
414
- check_names: Whether to check names equivalence
415
- check_crs: Whether to check CRS equivalence
416
- check_geom_type: Whether to check geometry type
417
- check_less_precise: Whether to use less precise comparison
418
- normalize: Whether to normalize geometries before comparison
419
"""
420
...
421
422
def assert_geodataframe_equal(left, right, check_dtype=True, check_index_type=True, check_frame_type=True, check_names=True, check_crs=True, check_geom_type=False, check_less_precise=False, normalize=False):
423
"""
424
Assert that two GeoDataFrames are equal.
425
426
Parameters:
427
- left: GeoDataFrame to compare
428
- right: GeoDataFrame to compare
429
- check_dtype: Whether to check dtype equivalence
430
- check_index_type: Whether to check index type equivalence
431
- check_frame_type: Whether to check frame type equivalence
432
- check_names: Whether to check names equivalence
433
- check_crs: Whether to check CRS equivalence
434
- check_geom_type: Whether to check geometry type
435
- check_less_precise: Whether to use less precise comparison
436
- normalize: Whether to normalize geometries before comparison
437
"""
438
...
439
```
440
441
### Version Information
442
443
Function for displaying version information about GeoPandas and its dependencies.
444
445
```python { .api }
446
def show_versions():
447
"""
448
Print version information for GeoPandas and its dependencies.
449
450
This function prints version information for GeoPandas, Python, and all
451
the dependencies that are installed on the system.
452
"""
453
...
454
```
455
456
## Usage Examples
457
458
### Creating GeoDataFrames
459
460
```python
461
import geopandas as gpd
462
import pandas as pd
463
from shapely.geometry import Point, Polygon
464
465
# From existing DataFrame with geometry column
466
df = pd.DataFrame({
467
'City': ['New York', 'London', 'Tokyo'],
468
'Population': [8400000, 8900000, 13960000]
469
})
470
geometry = [Point(-74.0, 40.7), Point(-0.1, 51.5), Point(139.7, 35.7)]
471
gdf = gpd.GeoDataFrame(df, geometry=geometry, crs='EPSG:4326')
472
473
# From scratch with geometry data
474
gdf = gpd.GeoDataFrame({
475
'id': [1, 2, 3],
476
'name': ['Area A', 'Area B', 'Area C'],
477
'geometry': [
478
Polygon([(0, 0), (1, 0), (1, 1), (0, 1)]),
479
Polygon([(1, 0), (2, 0), (2, 1), (1, 1)]),
480
Polygon([(0, 1), (1, 1), (1, 2), (0, 2)])
481
]
482
}, crs='EPSG:4326')
483
484
# Using points_from_xy
485
gdf = gpd.GeoDataFrame({
486
'location': ['A', 'B', 'C']
487
}, geometry=gpd.points_from_xy([-1, 0, 1], [1, 0, -1], crs='EPSG:4326'))
488
```
489
490
### Working with Geometry Columns
491
492
```python
493
# Access geometry properties
494
print(gdf.geometry.area)
495
print(gdf.geometry.centroid)
496
print(gdf.total_bounds)
497
498
# Set and rename geometry columns
499
gdf = gdf.set_geometry('geometry')
500
gdf = gdf.rename_geometry('geom')
501
502
# Work with coordinate reference systems
503
print(gdf.crs)
504
gdf = gdf.set_crs('EPSG:4326')
505
gdf_utm = gdf.to_crs(gdf.estimate_utm_crs())
506
```
507
508
### GeoSeries Operations
509
510
```python
511
# Create GeoSeries
512
from shapely.geometry import Point, LineString
513
gs = gpd.GeoSeries([
514
Point(0, 0),
515
LineString([(0, 0), (1, 1)]),
516
Point(1, 1)
517
], crs='EPSG:4326')
518
519
# Access coordinate properties for Points
520
points = gs[gs.geom_type == 'Point']
521
print(points.x) # X coordinates
522
print(points.y) # Y coordinates
523
524
# Spatial indexing
525
print(gs.sindex) # Spatial index for queries
526
```