CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-erddapy

Python interface for ERDDAP data servers that simplifies accessing scientific datasets

Pending
Overview
Eval results
Files

data-conversion.mddocs/

Data Format Conversion

Convert ERDDAP data URLs and responses into various Python data analysis formats including pandas DataFrames, xarray Datasets, netCDF4 objects, and iris CubeLists. These functions provide the bridge between ERDDAP's web-based data services and Python's scientific computing ecosystem.

Capabilities

Pandas DataFrame Conversion

Convert ERDDAP CSV responses into pandas DataFrames for tabular data analysis.

def to_pandas(
    url: str,
    requests_kwargs: dict | None = None,
    pandas_kwargs: dict | None = None
) -> pd.DataFrame:
    """
    Convert ERDDAP URL to pandas DataFrame.
    
    Fetches data from the URL and parses it as CSV using pandas.read_csv.
    Typically used with tabledap URLs that return tabular data.
    
    Parameters:
    - url: ERDDAP data URL (usually with .csv or .csvp response)
    - requests_kwargs: Arguments passed to HTTP request (auth, timeout, etc.)
    - pandas_kwargs: Arguments passed to pandas.read_csv (parse_dates, dtype, etc.)
    
    Returns:
    - pandas.DataFrame with the downloaded data
    
    Raises:
    - ValueError: If URL cannot be read or parsed as CSV
    """

Usage Examples:

from erddapy.core.interfaces import to_pandas
from erddapy import ERDDAP

# Direct URL conversion
url = "https://gliders.ioos.us/erddap/tabledap/ru29-20150623T1046.csv?time,latitude,longitude,temperature&time>=2015-06-23T10:46:00Z&time<=2015-06-24T10:46:00Z"
df = to_pandas(url)
print(df.head())

# With custom pandas options
df = to_pandas(
    url,
    pandas_kwargs={
        'parse_dates': ['time'],
        'dtype': {'temperature': 'float32'}
    }
)

# Via ERDDAP instance (recommended)
e = ERDDAP(server="NGDAC", protocol="tabledap")
e.dataset_id = "ru29-20150623T1046"
e.constraints = {
    'time>=': '2015-06-23T10:46:00Z',
    'time<=': '2015-06-24T10:46:00Z'
}
df = e.to_pandas()  # Uses to_pandas internally

xarray Dataset Conversion

Convert ERDDAP responses into xarray Datasets for N-dimensional labeled array analysis.

def to_xarray(
    url: str,
    response: str = "opendap",
    requests_kwargs: dict | None = None,
    xarray_kwargs: dict | None = None
) -> xr.Dataset:
    """
    Convert ERDDAP URL to xarray Dataset.
    
    Handles different response formats (NetCDF, OPeNDAP) and opens them
    with xarray. Particularly useful for gridded data from griddap servers.
    
    Parameters:
    - url: ERDDAP data URL
    - response: Response type ('nc', 'opendap', 'ncCF') 
    - requests_kwargs: HTTP request arguments including auth
    - xarray_kwargs: Arguments passed to xarray.open_dataset
    
    Returns:
    - xarray.Dataset with labeled dimensions and coordinates
    """

Usage Examples:

from erddapy.core.interfaces import to_xarray
from erddapy import ERDDAP

# Direct conversion from NetCDF URL
nc_url = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41.nc?analysed_sst[(2020-01-01T09:00:00Z)][(89.99):(-89.99)][(179.99):(-179.99)]"
ds = to_xarray(
    nc_url, 
    response="nc",
    requests_kwargs={},
    xarray_kwargs={'decode_times': True}
)
print(ds)

# Via ERDDAP instance for griddap data
e = ERDDAP(server="CSWC", protocol="griddap")
e.dataset_id = "jplMURSST41"
e.constraints = {
    'time': '2020-01-01T09:00:00Z',
    'latitude': slice(40, 50),
    'longitude': slice(-130, -120)
}
ds = e.to_xarray()  # Automatically selects appropriate response format
print(f"Dataset dimensions: {list(ds.dims)}")
print(f"Data variables: {list(ds.data_vars)}")

netCDF4 Dataset Conversion

Convert ERDDAP responses into netCDF4 Dataset objects for low-level NetCDF file access.

def to_ncCF(
    url: str,
    protocol: str | None = None,
    requests_kwargs: dict | None = None
) -> netCDF4.Dataset:
    """
    Convert ERDDAP URL to CF-compliant netCDF4 Dataset.
    
    Downloads data and opens it as a netCDF4 Dataset object,
    providing direct access to NetCDF attributes and methods.
    
    Parameters:
    - url: ERDDAP data URL (typically .ncCF response)
    - protocol: 'tabledap' or 'griddap' (affects processing)
    - requests_kwargs: HTTP request arguments
    
    Returns:
    - netCDF4.Dataset object
    """

Usage Examples:

from erddapy.core.interfaces import to_ncCF
import netCDF4

# Convert URL to netCDF4 Dataset
url = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41.ncCF?analysed_sst[(2020-01-01T09:00:00Z)][(40):(50)][(-130):(-120)]"
nc_ds = to_ncCF(url, protocol="griddap")

# Access netCDF4 methods and attributes
print("Global attributes:")
for attr in nc_ds.ncattrs():
    print(f"  {attr}: {getattr(nc_ds, attr)}")

print("\nVariables:")
for var_name, var in nc_ds.variables.items():
    print(f"  {var_name}: {var.shape} {var.dtype}")

# Access data
sst = nc_ds.variables['analysed_sst'][:]
print(f"SST data shape: {sst.shape}")

# Close when done
nc_ds.close()

# Via ERDDAP instance
e = ERDDAP(server="CSWC", protocol="griddap")
e.dataset_id = "jplMURSST41"
nc_ds = e.to_ncCF()

iris CubeList Conversion

Convert ERDDAP responses into iris CubeLists for Earth science data analysis with CF conventions.

def to_iris(
    url: str,
    iris_kwargs: dict = None
) -> iris.cube.CubeList:
    """
    Convert ERDDAP URL to iris CubeList.
    
    Downloads NetCDF data and loads it with iris, providing
    Earth science-specific data structures and analysis tools.
    
    Parameters:
    - url: ERDDAP data URL (NetCDF format)
    - iris_kwargs: Arguments passed to iris.load_raw
    
    Returns:
    - iris.cube.CubeList containing loaded cubes
    """

Usage Examples:

from erddapy.core.interfaces import to_iris
import iris

# Convert URL to iris CubeList
url = "https://coastwatch.pfeg.noaa.gov/erddap/griddap/jplMURSST41.nc?analysed_sst[(2020-01-01T09:00:00Z)][(40):(50)][(-130):(-120)]"
cubes = to_iris(url)

# Work with iris cubes
for cube in cubes:
    print(f"Cube: {cube.name()}")
    print(f"Shape: {cube.shape}")
    print(f"Units: {cube.units}")
    print(f"Coordinates: {[coord.name() for coord in cube.coords()]}")

# Access first cube
if cubes:
    sst_cube = cubes[0]
    
    # iris provides rich metadata access
    print(f"Standard name: {sst_cube.standard_name}")
    print(f"Long name: {sst_cube.long_name}")
    
    # Access coordinate information
    for coord in sst_cube.coords():
        print(f"Coordinate {coord.name()}: {coord.units}")

# Via ERDDAP instance
e = ERDDAP(server="CSWC", protocol="griddap")
e.dataset_id = "jplMURSST41"
cubes = e.to_iris()

Format Selection Guidelines

Choose the appropriate data format based on your analysis needs:

pandas DataFrame

  • Best for: Tabular data, time series, station data
  • Use when: Working with tabledap data, CSV-like datasets
  • Advantages: Familiar API, excellent for data manipulation, filtering, grouping
  • Example datasets: Glider tracks, buoy time series, cruise data

xarray Dataset

  • Best for: Multi-dimensional gridded data, labeled arrays
  • Use when: Working with griddap data, satellite imagery, model output
  • Advantages: Labeled dimensions, broadcasting, CF conventions support
  • Example datasets: Satellite SST, ocean models, atmospheric reanalysis

netCDF4 Dataset

  • Best for: Low-level NetCDF access, custom attribute handling
  • Use when: Need direct NetCDF file manipulation, specific format requirements
  • Advantages: Complete NetCDF API access, metadata control
  • Example datasets: Any NetCDF data requiring specialized processing

iris CubeList

  • Best for: Earth science analysis with CF conventions
  • Use when: Working with meteorological/oceanographic data, need CF-aware processing
  • Advantages: CF conventions, coordinate systems, Earth science specific tools
  • Example datasets: Weather models, climate data, ocean reanalysis

HTTP Request Configuration

All conversion functions support HTTP request customization:

from erddapy.core.interfaces import to_pandas

# Authentication
requests_config = {
    'auth': ('username', 'password'),
    'timeout': 60,
    'headers': {'User-Agent': 'MyApp/1.0'}
}

df = to_pandas(url, requests_kwargs=requests_config)

# SSL configuration
requests_config = {
    'verify': False,  # Skip SSL verification (not recommended)
    'cert': ('client.cert', 'client.key')  # Client certificates
}

Supported Download Formats

ERDDAP supports numerous output formats for data download. The complete list of available formats:

download_formats = [
    "asc", "csv", "csvp", "csv0", "dataTable", "das", "dds", "dods", 
    "esriCsv", "fgdc", "geoJson", "graph", "help", "html", "iso19115", 
    "itx", "json", "jsonlCSV1", "jsonlCSV", "jsonlKVP", "mat", "nc", 
    "ncHeader", "ncCF", "ncCFHeader", "ncCFMA", "ncCFMAHeader", "nccsv", 
    "nccsvMetadata", "ncoJson", "odvTxt", "subset", "tsv", "tsvp", "tsv0", 
    "wav", "xhtml", "kml", "smallPdf", "pdf", "largePdf", "smallPng", 
    "png", "largePng", "transparentPng"
]

These formats can be used with the response parameter in URL building functions and the file_type parameter in the download_file method.

Error Handling

The conversion functions provide informative error messages:

from erddapy.core.interfaces import to_pandas

try:
    df = to_pandas("https://invalid-url.com/data.csv")
except ValueError as e:
    print(f"Conversion failed: {e}")

# Handle different error types
try:
    df = to_pandas(
        "https://coastwatch.pfeg.noaa.gov/erddap/tabledap/nonexistent.csv",
        requests_kwargs={'timeout': 10}
    )
except Exception as e:
    print(f"Request failed: {type(e).__name__}: {e}")

Install with Tessl CLI

npx tessl i tessl/pypi-erddapy

docs

data-conversion.md

erddap-client.md

index.md

multi-server-search.md

server-management.md

tile.json