Python library for the Socrata Open Data API
npx @tessl/cli install tessl/pypi-sodapy@2.2.0A Python client library for the Socrata Open Data API (SODA). Sodapy enables programmatic access to open data hosted on Socrata platforms, providing comprehensive functionality for reading datasets with SoQL query support, paginating through large datasets, managing dataset metadata, and performing dataset creation and data upsert operations.
pip install sodapyfrom sodapy import Socrata
import sodapy # For version access
from typing import Generator
from io import IOBaseVersion information:
print(sodapy.__version__) # "2.2.0"from sodapy import Socrata
# Initialize client with domain and optional app token
client = Socrata("opendata.socrata.com", "your_app_token")
# Basic data retrieval
results = client.get("dataset_id")
# Query with SoQL filtering
results = client.get("dataset_id", where="column > 100", limit=500)
# Get all data with automatic pagination
for record in client.get_all("dataset_id"):
print(record)
# Always close the client when done
client.close()
# Or use as context manager
with Socrata("opendata.socrata.com", "your_app_token") as client:
results = client.get("dataset_id", where="age > 21")Sodapy is built around a single Socrata class that manages HTTP sessions and provides methods for all SODA API operations. The client handles authentication (basic HTTP auth, OAuth 2.0, or app tokens), automatic rate limiting, and provides both synchronous data access and generator-based pagination for large datasets.
Create and configure a Socrata client for API access.
class Socrata:
def __init__(
self,
domain: str,
app_token: str | None,
username: str | None = None,
password: str | None = None,
access_token: str | None = None,
session_adapter: dict | None = None,
timeout: int | float = 10
):
"""
Initialize Socrata client.
Args:
domain: Socrata domain (e.g., "opendata.socrata.com")
app_token: Socrata application token (optional but recommended)
username: Username for basic HTTP auth (for write operations)
password: Password for basic HTTP auth (for write operations)
access_token: OAuth 2.0 access token
session_adapter: Custom session adapter configuration
timeout: Request timeout in seconds
"""Use Socrata client as a context manager for automatic cleanup.
def __enter__(self) -> 'Socrata':
"""Enter context manager."""
def __exit__(self, exc_type, exc_value, traceback) -> None:
"""Exit context manager and close session."""List and search for datasets on a Socrata domain.
def datasets(
self,
limit: int = 0,
offset: int = 0,
order: str = None,
**kwargs
) -> list:
"""
Returns list of datasets associated with a domain.
Args:
limit: Maximum number of results (0 = all)
offset: Offset for pagination
order: Field to sort on, optionally with ' ASC' or ' DESC'
ids: List of dataset IDs to filter
domains: List of additional domains to search
categories: List of category filters
tags: List of tag filters
only: List of logical types ('dataset', 'chart', etc.)
shared_to: User/team IDs or 'site' for public datasets
column_names: Required column names in tabular datasets
q: Full text search query
min_should_match: Elasticsearch match requirement
attribution: Organization filter
license: License filter
derived_from: Parent dataset ID filter
provenance: 'official' or 'community'
for_user: Owner user ID filter
visibility: 'open' or 'internal'
public: Boolean for public/private filter
published: Boolean for published status filter
approval_status: 'pending', 'rejected', 'approved', 'not_ready'
explicitly_hidden: Boolean for hidden status filter
derived: Boolean for derived dataset filter
Returns:
List of dataset metadata dictionaries
"""Retrieve data from Socrata datasets with query capabilities.
def get(
self,
dataset_identifier: str,
content_type: str = "json",
**kwargs
) -> list | dict | str:
"""
Read data from dataset with SoQL query support.
Args:
dataset_identifier: Dataset ID or identifier
content_type: Response format ('json', 'csv', 'xml')
select: Columns to return (defaults to all)
where: Row filter conditions
order: Sort specification
group: Column to group results on
limit: Maximum results to return (default 1000)
offset: Pagination offset (default 0)
q: Full text search value
query: Complete SoQL query string
exclude_system_fields: Exclude system fields (default True)
Returns:
List/dict of records for JSON, or string for CSV/XML
"""
def get_all(self, *args, **kwargs) -> Generator:
"""
Generator that retrieves all data with automatic pagination.
Accepts same arguments as get().
Yields:
Individual records from the dataset
"""Retrieve and update dataset metadata.
def get_metadata(
self,
dataset_identifier: str,
content_type: str = "json"
) -> dict:
"""
Retrieve dataset metadata.
Args:
dataset_identifier: Dataset ID
content_type: Response format
Returns:
Dataset metadata dictionary
"""
def update_metadata(
self,
dataset_identifier: str,
update_fields: dict,
content_type: str = "json"
) -> dict:
"""
Update dataset metadata.
Args:
dataset_identifier: Dataset ID
update_fields: Dictionary of fields to update
content_type: Response format
Returns:
Updated metadata
"""Insert, update, or replace data in datasets.
def upsert(
self,
dataset_identifier: str,
payload: list | dict | IOBase,
content_type: str = "json"
) -> dict:
"""
Insert, update, or delete data in existing dataset.
Args:
dataset_identifier: Dataset ID
payload: List of records, dictionary, or file object
content_type: Data format ('json', 'csv')
Returns:
Operation result with statistics
"""
def replace(
self,
dataset_identifier: str,
payload: list | dict | IOBase,
content_type: str = "json"
) -> dict:
"""
Replace all data in dataset with payload.
Args:
dataset_identifier: Dataset ID
payload: List of records, dictionary, or file object
content_type: Data format ('json', 'csv')
Returns:
Operation result with statistics
"""
def delete(
self,
dataset_identifier: str,
row_id: str | None = None,
content_type: str = "json"
) -> dict:
"""
Delete records or entire dataset.
Args:
dataset_identifier: Dataset ID
row_id: Specific row ID to delete (None deletes all)
content_type: Response format
Returns:
Operation result
"""Create and manage datasets.
def create(self, name: str, **kwargs) -> dict:
"""
Create new dataset with field types.
Args:
name: Dataset name
description: Dataset description
columns: List of column definitions
category: Dataset category (must exist in domain)
tags: List of tag strings
row_identifier: Primary key field name
new_backend: Use new backend (default False)
Returns:
Created dataset metadata
"""
def publish(
self,
dataset_identifier: str,
content_type: str = "json"
) -> dict:
"""
Publish a dataset.
Args:
dataset_identifier: Dataset ID
content_type: Response format
Returns:
Publication result
"""
def set_permission(
self,
dataset_identifier: str,
permission: str = "private",
content_type: str = "json"
) -> dict:
"""
Set dataset permissions.
Args:
dataset_identifier: Dataset ID
permission: 'private' or 'public'
content_type: Response format
Returns:
Permission update result
"""Manage file attachments on datasets.
def download_attachments(
self,
dataset_identifier: str,
content_type: str = "json",
download_dir: str = "~/sodapy_downloads"
) -> list:
"""
Download all attachments for a dataset.
Args:
dataset_identifier: Dataset ID
content_type: Response format
download_dir: Local directory for downloads (default: ~/sodapy_downloads)
Returns:
List of downloaded file paths
"""
def create_non_data_file(
self,
params: dict,
files: dict
) -> dict:
"""
Create non-data file attachment.
Args:
params: File parameters and metadata
files: Dictionary containing file tuple
Returns:
Created file metadata
"""
def replace_non_data_file(
self,
dataset_identifier: str,
params: dict,
files: dict
) -> dict:
"""
Replace existing non-data file attachment.
Args:
dataset_identifier: Dataset ID
params: File parameters and metadata
files: Dictionary containing file tuple
Returns:
Updated file metadata
"""Manage HTTP session lifecycle.
def close(self) -> None:
"""Close the HTTP session."""class Socrata:
DEFAULT_LIMIT = 1000 # Default pagination limitSodapy raises standard HTTP exceptions for API errors. The library includes enhanced error handling that extracts additional error information from Socrata API responses when available.
Common exceptions:
requests.exceptions.HTTPError: HTTP 4xx/5xx responses with detailed error messagesTypeError: Invalid parameter types (e.g. non-numeric timeout)Exception: Missing required parameters (e.g. domain not provided)Sodapy supports the full Socrata Query Language (SoQL) for filtering and aggregating data:
Example SoQL usage:
# Filter and sort results
results = client.get("dataset_id",
where="age > 21 AND city = 'Boston'",
select="name, age, city",
order="age DESC",
limit=100)
# Aggregation with grouping
results = client.get("dataset_id",
select="city, COUNT(*) as total",
group="city",
order="total DESC")