tessl/pypi-obspy

ObsPy is a Python toolbox for seismology providing parsers for seismological data formats, clients for data centers, and signal processing routines for seismological time series analysis.

—

Pending

Overview

Eval results

Files

File Format I/O

Name: tessl/pypi-obspy
Author: tessl

Support for 67+ seismological file formats with automatic format detection, unified read/write interfaces, and format-specific optimizations for waveforms, events, and station metadata. ObsPy's I/O system provides seamless interoperability across the global seismological data ecosystem.

Capabilities

Universal Read/Write Functions

Format-agnostic functions with automatic format detection that handle the complexity of seismological data formats transparently.

def read(pathname_or_url, format=None, headonly=False, starttime=None, 
         endtime=None, nearest_sample=True, dtype=None, apply_calib=False,
         check_compression=True, **kwargs) -> Stream:
    """
    Read waveform files into Stream object with automatic format detection.
    
    Args:
        pathname_or_url: File path, URL, file-like object, or glob pattern
        format: Format hint (auto-detected if None)  
        headonly: Read metadata only, skip waveform data
        starttime: Start time for reading window (UTCDateTime)
        endtime: End time for reading window (UTCDateTime)
        nearest_sample: Align times to nearest available sample
        dtype: Convert data to specified NumPy dtype
        apply_calib: Apply calibration factor from metadata
        check_compression: Verify compressed data integrity
        **kwargs: Format-specific reading options
        
    Returns:
        Stream object containing traces with waveform data and metadata
        
    Supported Formats:
        MiniSEED (.mseed, .seed), SAC (.sac), GSE2 (.gse), SEG-Y (.segy, .sgy),
        WIN (.win), CSS (.wfdisc), SEISAN (.seisan), AH (.ah), WAV (.wav),
        GCF (.gcf), RefTek (.rt130), PDAS (.pd), Y (.y), SEG-2 (.sg2),
        SH (.qhd, .qbn), Kinemetrics (.evt), NIED (.knet), RG16 (.rg16),
        DMX (.dmx), ALSEP (.pse, .wtn, .wth), ASCII formats, and others
    """

def read_events(pathname_or_url, format=None, **kwargs):
    """
    Read earthquake event files into Catalog object with automatic format detection.
    
    Args:
        pathname_or_url: File path, URL, file-like object, or glob pattern
        format: Format hint (auto-detected if None)
        **kwargs: Format-specific options
        
    Returns:
        Catalog object containing earthquake events
        
    Supported Formats:
        QuakeML (.xml), NDK (.ndk), CMTSOLUTION (.cmt), Nordic (.nordic),
        NonLinLoc (.hyp, .obs), SC3ML (.xml), ZMAP (.zmap), JSON (.json),
        MCHEDR (.txt), CNV (.cnv), FOCMEC (.foc), HypoDD (.pha), 
        SCARDEC (.txt), GSE2 bulletin, IMS1.0 bulletin, and others
    """

def read_inventory(pathname_or_url, format=None, **kwargs):
    """
    Read station metadata files into Inventory object with automatic format detection.
    
    Args:
        pathname_or_url: File path, URL, file-like object, or glob pattern
        format: Format hint (auto-detected if None)
        **kwargs: Format-specific options
        
    Returns:
        Inventory object containing station/channel metadata and responses
        
    Supported Formats:
        StationXML (.xml), SEED/XSEED (.seed, .xml), Dataless SEED (.seed),
        RESP files (.resp), SACPZ (.pz), CSS station files (.site),
        Station text (.txt), SC3ML inventory, ArcLink XML, and others
    """

Write Methods

Integrated write functionality in core data objects for exporting data in multiple formats.

# Stream write methods
Stream.write(self, filename: str, format: str, **kwargs):
    """
    Write stream to file in specified format.
    
    Args:
        filename: Output filename (extension used for format detection)
        format: Output format (required for some formats)
        **kwargs: Format-specific writing options
        
    Supported Write Formats:
        MiniSEED, SAC, GSE2, SEG-Y, WAV, GCF, ASCII, PICKLE, and others
    """

# Catalog write methods  
Catalog.write(self, filename: str, format: str, **kwargs):
    """
    Write catalog to file in specified format.
    
    Args:
        filename: Output filename
        format: Output format (QuakeML, NDK, CMTSOLUTION, etc.)
        **kwargs: Format-specific options
        
    Supported Write Formats:
        QuakeML, NDK, CMTSOLUTION, ZMAP, JSON, CNV, NORDIC, 
        SHAPEFILE, KML, and others
    """

# Inventory write methods
Inventory.write(self, filename: str, format: str, **kwargs):
    """
    Write inventory to file in specified format.
    
    Args:
        filename: Output filename
        format: Output format
        **kwargs: Format-specific options
        
    Supported Write Formats:
        StationXML, SACPZ, CSS, STATIONTXT, SHAPEFILE, KML, and others
    """

Format-Specific Features

MiniSEED Format

Most comprehensive support with advanced features for the seismological standard format.

# Import from obspy.io.mseed
class InternalMSEEDError(Exception):
    """Internal MiniSEED library error."""
    pass

class InternalMSEEDWarning(UserWarning):
    """Internal MiniSEED library warning."""
    pass

class ObsPyMSEEDError(Exception):
    """ObsPy MiniSEED-specific error."""
    pass

class ObsPyMSEEDFilesizeTooSmallError(ObsPyMSEEDError):
    """MiniSEED file too small to contain valid data."""
    pass

# MiniSEED-specific functions
def get_record_information(filename: str, offset: int = 0):
    """
    Get detailed information about MiniSEED record.
    
    Args:
        filename: MiniSEED filename
        offset: Byte offset in file
        
    Returns:
        Dictionary with record header information
    """

SAC Format

Seismic Analysis Code format with extensive header support.

# SAC format supports rich metadata through header variables
# Automatic conversion between ObsPy Stats and SAC header format
# Includes support for SAC XY format for non-time-series data

# Read with SAC-specific options
st = read('seismic.sac', debug_headers=True, checksum=True)

# Write with SAC format preservation
st.write('output.sac', format='SAC')

File Format Categories

Waveform Formats (26 formats)

# Complete list of supported waveform formats
WAVEFORM_FORMATS = {
    'MSEED': 'MiniSEED format - seismological standard',
    'SAC': 'Seismic Analysis Code format',
    'GSE2': 'Group of Scientific Experts format', 
    'SEGY': 'Society of Exploration Geophysicists Y format',
    'WIN': 'WIN format from NIED Japan',
    'CSS': 'Center for Seismic Studies waveform format',
    'SEISAN': 'SEISAN seismology software format',
    'AH': 'Ad Hoc format',
    'WAV': 'WAV audio format',
    'GCF': 'Guralp Compressed Format',
    'REFTEK130': 'RefTek RT-130 format',
    'PDAS': 'PDAS format',
    'Y': 'Nanometrics Y format',
    'SEG2': 'SEG-2 format',
    'SH_ASC': 'Seismic Handler ASCII format',
    'Q': 'Seismic Handler Q format',
    'KINEMETRICS_EVT': 'Kinemetrics EVT format',
    'KNET': 'NIED K-NET ASCII format',
    'RG16': 'Fairfield RG-16 format',
    'DMX': 'DMX format',
    'ALSEP_PSE': 'Apollo Lunar PSE format',
    'ALSEP_WTN': 'Apollo Lunar WTN format', 
    'ALSEP_WTH': 'Apollo Lunar WTH format',
    'TSPAIR': 'Time-sample pair ASCII',
    'SLIST': 'Sample list ASCII',
    'PICKLE': 'Python pickle format'
}

Event Formats (18 formats)

EVENT_FORMATS = {
    'QUAKEML': 'QuakeML - FDSN standard XML format',
    'NDK': 'Harvard CMT NDK format',
    'CMTSOLUTION': 'CMT solution format',
    'NORDIC': 'Nordic format from NORSAR',
    'NLLOC_HYP': 'NonLinLoc hypocenter format',
    'SC3ML': 'SeisComp3 ML format',
    'ZMAP': 'ZMAP format',
    'JSON': 'JSON event format',
    'MCHEDR': 'PDE MCHEDR format',
    'CNV': 'CNV format',
    'FOCMEC': 'FOCMEC focal mechanism format',
    'HYPODD_PHA': 'HypoDD phase format',
    'SCARDEC': 'SCARDEC format',
    'SHAPEFILE': 'ESRI Shapefile export',
    'KML': 'Google Earth KML format',
    'FNETMT': 'F-net moment tensor format',
    'GSE2': 'GSE2 bulletin format',
    'IMS10BULLETIN': 'IMS1.0 bulletin format'
}

Inventory Formats (9 formats)

INVENTORY_FORMATS = {
    'STATIONXML': 'FDSN StationXML - metadata standard',
    'SEED': 'SEED format with full response',
    'XSEED': 'XML-SEED format',
    'RESP': 'RESP response file format',
    'SACPZ': 'SAC pole-zero format',
    'CSS': 'CSS station table format',
    'STATIONTXT': 'FDSN station text format',
    'SC3ML': 'SeisComp3 inventory format',
    'INVENTORYXML': 'ArcLink inventory XML'
}

Usage Examples

Basic File I/O Operations

from obspy import read, read_events, read_inventory

# Read various waveform formats (automatic detection)
st_mseed = read('data.mseed')
st_sac = read('data.sac') 
st_segy = read('data.segy')
st_multiple = read('data*.mseed')  # Read multiple files

# Read with specific parameters
st = read('data.mseed', 
          starttime=UTCDateTime("2023-01-01T10:00:00"),
          endtime=UTCDateTime("2023-01-01T11:00:00"),
          headonly=False)

# Read event catalogs
catalog_quakeml = read_events('events.xml')
catalog_ndk = read_events('catalog.ndk')
catalog_json = read_events('events.json')

# Read station metadata
inventory_xml = read_inventory('stations.xml')
inventory_seed = read_inventory('dataless.seed')
inventory_resp = read_inventory('RESP.IU.ANMO.00.BHZ')

Format Conversion Workflows

from obspy import read, read_events, read_inventory

# Convert waveform formats
st = read('input.sac')
st.write('output.mseed', format='MSEED')
st.write('output.segy', format='SEGY', 
         data_encoding=1,  # 4-byte IBM floating point
         byteorder='>')    # Big-endian

# Convert event formats
catalog = read_events('events.xml', format='QUAKEML')
catalog.write('events.ndk', format='NDK')
catalog.write('events.txt', format='ZMAP')
catalog.write('events.kml', format='KML')

# Convert station metadata formats
inventory = read_inventory('stations.xml')
inventory.write('stations.pz', format='SACPZ')
inventory.write('stations.txt', format='STATIONTXT')
inventory.write('stations.kml', format='KML')

Advanced Format Options

from obspy import read, Stream
from obspy.io.mseed import InternalMSEEDError

# MiniSEED with specific options
try:
    st = read('data.mseed', 
              apply_calib=True,       # Apply calibration
              check_compression=True,  # Verify integrity
              details=True,           # Get detailed info
              headonly=False)         # Read full data
except InternalMSEEDError as e:
    print(f"MiniSEED error: {e}")

# SAC format with header debugging
st = read('data.sac', debug_headers=True, checksum=True)

# Write MiniSEED with compression
st.write('compressed.mseed', format='MSEED',
         encoding='STEIM2',      # Compression algorithm
         reclen=512,             # Record length
         byteorder='>',          # Big-endian
         flush=True)             # Flush buffers

# Write SAC with specific byte order
st.write('output.sac', format='SAC', byteorder='little')

Bulk File Processing

import glob
from obspy import read, Stream

# Process multiple files efficiently
all_files = glob.glob('seismic_data/*.mseed')
master_stream = Stream()

for filename in all_files:
    try:
        st = read(filename)
        master_stream += st  # Concatenate streams
    except Exception as e:
        print(f"Error reading {filename}: {e}")

# Merge and clean up
master_stream.merge(method=1)  # Merge overlapping traces
master_stream.sort()           # Sort by time and metadata

# Write combined dataset
master_stream.write('combined_data.mseed', format='MSEED')

# Split by station and write separately
for network_station in set([f"{tr.stats.network}.{tr.stats.station}" 
                           for tr in master_stream]):
    net, sta = network_station.split('.')
    st_station = master_stream.select(network=net, station=sta)
    st_station.write(f'{net}_{sta}_data.mseed', format='MSEED')

Format-Specific Error Handling

from obspy import read
from obspy.io.mseed import ObsPyMSEEDError, ObsPyMSEEDFilesizeTooSmallError
from obspy.io.sac import SacIOError
from obspy.core.util import ObsPyReadingError

def robust_read(filename):
    """Robust file reading with format-specific error handling."""
    try:
        return read(filename)
    
    except ObsPyMSEEDFilesizeTooSmallError:
        print(f"MiniSEED file {filename} is too small")
        return None
        
    except ObsPyMSEEDError as e:
        print(f"MiniSEED error in {filename}: {e}")
        # Try reading with relaxed checks
        try:
            return read(filename, check_compression=False)
        except:
            return None
            
    except SacIOError as e:
        print(f"SAC format error in {filename}: {e}")
        return None
        
    except ObsPyReadingError as e:
        print(f"General reading error in {filename}: {e}")
        return None
        
    except Exception as e:
        print(f"Unexpected error reading {filename}: {e}")
        return None

# Use robust reader
filenames = ['file1.mseed', 'file2.sac', 'file3.segy']
streams = [robust_read(f) for f in filenames]
streams = [s for s in streams if s is not None]  # Filter out failures

Types

# Format detection result
FormatInfo = {
    'format': str,           # Detected format name
    'confidence': float,     # Detection confidence (0-1)
    'extensions': list[str], # Typical file extensions
    'description': str       # Format description
}

# File header information (format-dependent)
HeaderInfo = {
    'sampling_rate': float,  # Sampling rate in Hz
    'npts': int,            # Number of data points
    'starttime': UTCDateTime, # Start time
    'network': str,         # Network code
    'station': str,         # Station code
    'channel': str,         # Channel code
    # Additional format-specific fields
}

Install with Tessl CLI