CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-segyio

Simple & fast IO for SEG-Y files

Pending
Overview
Eval results
Files

utilities.mddocs/

Utilities

Helper functions for format conversion, metadata extraction, array-to-SEG-Y conversion, and common seismic processing operations. These utilities simplify common workflows and provide high-level convenience functions.

Capabilities

Sample Rate and Timing

Functions for working with sample rates, timing information, and creating sample indices.

def dt(f, fallback_dt=4000.0):
    """
    Infer sample rate from SEG-Y file.
    
    Parameters:
    - f (SegyFile): Open SEG-Y file handle
    - fallback_dt (float): Fallback sample rate in microseconds if cannot be determined
    
    Returns:
    float: Sample interval in microseconds
    
    Raises:
    ValueError: If sample rate cannot be determined and no fallback provided
    """

def sample_indexes(segyfile, t0=0.0, dt_override=None):
    """
    Create sample index list at specified depth/time values.
    
    Parameters:
    - segyfile (SegyFile): Open SEG-Y file handle
    - t0 (float): Start time in milliseconds, default 0.0
    - dt_override (float): Override sample rate in milliseconds
    
    Returns:
    numpy.ndarray: Array of sample indices/times
    """

Usage Example:

import segyio
import numpy as np

with segyio.open('data.sgy') as f:
    # Get sample rate
    sample_rate = segyio.dt(f)
    print(f"Sample rate: {sample_rate} microseconds ({sample_rate/1000} ms)")
    
    # Create time axis
    times = segyio.sample_indexes(f, t0=0.0)
    print(f"Time range: {times[0]} to {times[-1]} ms")
    
    # Create custom time axis with different start time
    delayed_times = segyio.sample_indexes(f, t0=100.0)  # Start at 100ms
    
    # Override sample rate
    custom_times = segyio.sample_indexes(f, dt_override=2.0)  # 2ms sampling

Text Header Formatting

Utilities for creating and formatting SEG-Y textual headers.

def create_text_header(lines):
    """
    Format textual header from line dictionary.
    
    Parameters:
    - lines (dict): Line number to text mapping (1-40)
    
    Returns:
    str: Formatted EBCDIC header string (3200 bytes)
    
    Raises:
    ValueError: Invalid line numbers or content
    """

Usage Example:

import segyio

# Create custom textual header
header_lines = {
    1: "C 1 CLIENT: EXAMPLE SEISMIC COMPANY",
    2: "C 2 DATA TYPE: MIGRATED 3D SEISMIC DATA", 
    3: "C 3 AREA: NORTH SEA BLOCK 123/45",
    4: "C 4 ",
    5: "C 5 PROCESSING SEQUENCE:",
    6: "C 6 1. DEMULTIPLE",
    7: "C 7 2. DECONVOLUTION", 
    8: "C 8 3. VELOCITY ANALYSIS",
    9: "C 9 4. DMO CORRECTION",
    10: "C10 5. POST-STACK TIME MIGRATION",
    11: "C11 ",
    12: "C12 DATUM: MEAN SEA LEVEL",
    13: "C13 COORDINATE SYSTEM: UTM ZONE 31N",  
    14: "C14 SAMPLE RATE: 4 MS",
    15: "C15 RECORD LENGTH: 2000 MS"
}

text_header = segyio.create_text_header(header_lines)

# Use in file creation
spec = segyio.spec()
# ... configure spec ...

with segyio.create('output.sgy', spec) as f:
    f.text[0] = text_header

Data Format Conversion

Functions for converting between different data formats and handling IBM floating point.

def native(data, format=SegySampleFormat.IBM_FLOAT_4_BYTE, copy=True):
    """
    Convert numpy array to native float format.
    
    Parameters:
    - data (numpy.ndarray): Input data array
    - format (int or SegySampleFormat): Source data format
    - copy (bool): Whether to copy data or modify in-place
    
    Returns:
    numpy.ndarray: Data converted to native float format
    
    Raises:
    ValueError: Unsupported format or invalid data
    """

Usage Example:

import segyio
import numpy as np

with segyio.open('ibm_data.sgy') as f:
    # Read trace with IBM float format
    ibm_trace = f.trace[0]
    
    # Convert to native IEEE float
    ieee_trace = segyio.native(ibm_trace, segyio.SegySampleFormat.IBM_FLOAT_4_BYTE)
    
    # Check format in binary header
    format_code = f.bin[segyio.BinField.Format]
    if format_code == segyio.SegySampleFormat.IBM_FLOAT_4_BYTE:
        print("Converting from IBM to IEEE float")
        converted = segyio.native(ibm_trace, format_code)
    else:
        converted = ibm_trace  # Already in native format

Data Collection and Aggregation

Functions for collecting and combining trace data into larger arrays.

def collect(itr):
    """
    Collect iterable of traces/lines into single ndarray.
    
    Parameters:
    - itr (iterable): Iterable of numpy.ndarray objects (traces, lines, etc.)
    
    Returns:
    numpy.ndarray: Combined array with additional dimension
    
    Raises:
    ValueError: Inconsistent array shapes
    """

def cube(f):
    """
    Read full 3D cube from file into memory.
    
    Parameters:
    - f (str or SegyFile): File path or open file handle
    
    Returns:
    numpy.ndarray: 3D array (inlines, crosslines, samples)
    
    Raises:
    ValueError: File is not 3D structured
    MemoryError: Insufficient memory for cube
    """

Usage Example:

import segyio
import numpy as np

# Read and collect multiple inlines
with segyio.open('3d_data.sgy') as f:
    # Collect first 10 inlines into 3D array
    inline_list = [f.iline[il] for il in f.ilines[:10]]
    collected_inlines = segyio.collect(inline_list)
    print(f"Collected shape: {collected_inlines.shape}")
    
    # Read full cube (memory intensive!)
    full_cube = segyio.cube(f)
    print(f"Full cube shape: {full_cube.shape}")

# Collect traces with processing
with segyio.open('data.sgy') as f:
    def process_trace(trace):
        # Apply some processing
        return trace * 2.0  # Simple gain
    
    processed_traces = segyio.collect(
        process_trace(f.trace[i]) for i in range(min(100, f.tracecount))
    )

Survey Metadata Extraction

Functions for extracting survey metadata and geometry information.

def rotation(f, line='fast'):
    """
    Find rotation angle of survey.
    
    Parameters:
    - f (SegyFile): Open SEG-Y file handle
    - line (str): 'fast' or 'slow' dimension
    
    Returns:
    tuple: (rotation_angle, cdpx_center, cdpy_center)
    """

def metadata(f):
    """
    Extract survey metadata as spec object.
    
    Parameters:
    - f (str or SegyFile): File path or open file handle
    
    Returns:
    segyio.spec: Specification object containing file metadata
    """

Usage Example:

import segyio

# Analyze survey rotation
with segyio.open('3d_survey.sgy') as f:
    rotation_angle, center_x, center_y = segyio.rotation(f)
    print(f"Survey rotation: {rotation_angle:.2f} degrees")
    print(f"Survey center: ({center_x:.2f}, {center_y:.2f})")

# Extract complete metadata
survey_spec = segyio.metadata('survey.sgy')
print(f"Inline range: {survey_spec.ilines}")
print(f"Crossline range: {survey_spec.xlines}")
print(f"Sample count: {len(survey_spec.samples)}")
print(f"Data format: {survey_spec.format}")

Array-to-SEG-Y Conversion

Functions for creating SEG-Y files from numpy arrays with various geometries.

def from_array(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
    """
    Create SEG-Y file from n-dimensional numpy array.
    
    Parameters:
    - filename (str): Output file path
    - data (numpy.ndarray): Input data array
    - iline (int): Inline header field number
    - xline (int): Crossline header field number  
    - format (SegySampleFormat): Output data format
    - dt (int): Sample interval in microseconds
    - delrt (int): Delay recording time in milliseconds
    
    Returns:
    None: Creates file on disk
    """

def from_array2D(filename, data, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
    """
    Create SEG-Y file from 2D array (shot records).
    
    Parameters:
    - filename (str): Output file path
    - data (numpy.ndarray): 2D array (traces, samples)
    - format (SegySampleFormat): Output data format
    - dt (int): Sample interval in microseconds
    - delrt (int): Delay recording time in milliseconds
    
    Returns:
    None: Creates file from 2D geometry
    """

def from_array3D(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
    """
    Create SEG-Y file from 3D array (post-stack volume).
    
    Parameters:
    - filename (str): Output file path  
    - data (numpy.ndarray): 3D array (inlines, crosslines, samples)
    - iline (int): Inline header field number
    - xline (int): Crossline header field number
    - format (SegySampleFormat): Output data format
    - dt (int): Sample interval in microseconds
    - delrt (int): Delay recording time in milliseconds
    
    Returns:
    None: Creates file from 3D geometry
    """

def from_array4D(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
    """
    Create SEG-Y file from 4D array (pre-stack volume).
    
    Parameters:
    - filename (str): Output file path
    - data (numpy.ndarray): 4D array (inlines, crosslines, offsets, samples)  
    - iline (int): Inline header field number
    - xline (int): Crossline header field number
    - format (SegySampleFormat): Output data format
    - dt (int): Sample interval in microseconds
    - delrt (int): Delay recording time in milliseconds
    
    Returns:
    None: Creates file from 4D geometry
    """

Usage Example:

import segyio
import numpy as np

# Create synthetic seismic data
ni, nx, nt = 50, 40, 500  # 50 inlines, 40 crosslines, 500 samples
synthetic_data = np.random.randn(ni, nx, nt).astype(np.float32)

# Add some realistic seismic features
for i in range(ni):
    for j in range(nx):
        # Add a reflector at 200ms
        synthetic_data[i, j, 50:60] += 0.5 * np.sin(np.linspace(0, 4*np.pi, 10))

# Convert to SEG-Y
segyio.from_array3D(
    'synthetic_3d.sgy',
    synthetic_data,
    format=segyio.SegySampleFormat.IEEE_FLOAT_4_BYTE,
    dt=4000,  # 4ms sampling
    delrt=0   # Start at 0ms
)

# 2D seismic line (shot records)
shot_data = np.random.randn(120, 1000).astype(np.float32)  # 120 traces, 1000 samples
segyio.from_array2D('shot_record.sgy', shot_data, dt=2000)  # 2ms sampling

# 4D pre-stack data
prestack_data = np.random.randn(30, 25, 5, 750).astype(np.float32)  # 5 offsets
segyio.from_array4D('prestack_4d.sgy', prestack_data, dt=4000)

File Resampling

Destructive resampling operations for changing sample rates and timing.

def resample(f, rate=None, delay=None, micro=False, trace=True, binary=True):
    """
    Resample file traces (destructive operation).
    
    Parameters:
    - f (SegyFile): File handle (must be writable)
    - rate (float): New sample rate  
    - delay (float): New delay time
    - micro (bool): Whether rate/delay are in microseconds
    - trace (bool): Update trace headers
    - binary (bool): Update binary header
    
    Returns:
    SegyFile: Modified file handle
    
    Raises:
    ValueError: Invalid resampling parameters
    RuntimeError: File not writable
    """

Usage Example:

import segyio

# Resample file from 4ms to 2ms
with segyio.open('input_4ms.sgy', 'r+') as f:
    print(f"Original sample rate: {segyio.dt(f)/1000} ms")
    
    # Resample to 2ms (note: this is destructive!)
    segyio.resample(f, rate=2.0, micro=False)  # 2ms
    
    print(f"New sample rate: {segyio.dt(f)/1000} ms")
    
    # Update delay time to 100ms
    segyio.resample(f, delay=100.0, micro=False)

Processing Workflows

Quality Control Pipeline

def qc_workflow(input_file, output_report):
    """Complete QC workflow for SEG-Y files."""
    report = {}
    
    with segyio.open(input_file) as f:
        # Basic file information
        report['filename'] = input_file
        report['traces'] = f.tracecount
        report['samples'] = len(f.samples)
        report['sample_rate'] = segyio.dt(f) / 1000  # ms
        
        # Geometry analysis
        if not f.unstructured:
            report['inlines'] = len(f.ilines)
            report['crosslines'] = len(f.xlines) 
            report['offsets'] = len(f.offsets)
            
            # Survey rotation
            rotation_angle, _, _ = segyio.rotation(f)
            report['rotation'] = rotation_angle
        
        # Amplitude analysis
        sample_traces = [f.trace[i] for i in range(0, f.tracecount, max(1, f.tracecount//100))]
        all_samples = np.concatenate(sample_traces)
        
        report['amplitude_stats'] = {
            'min': float(all_samples.min()),
            'max': float(all_samples.max()),
            'mean': float(all_samples.mean()),
            'std': float(all_samples.std())
        }
    
    # Save report
    import json
    with open(output_report, 'w') as f:
        json.dump(report, f, indent=2)
    
    return report

Data Conversion Pipeline

def convert_to_ieee(input_file, output_file):
    """Convert IBM float SEG-Y to IEEE float format."""
    with segyio.open(input_file) as src:
        # Check current format
        current_format = src.bin[segyio.BinField.Format]
        
        if current_format != segyio.SegySampleFormat.IBM_FLOAT_4_BYTE:
            print("File is not IBM float format")
            return
        
        # Create new file spec
        spec = segyio.metadata(src)
        spec.format = segyio.SegySampleFormat.IEEE_FLOAT_4_BYTE
        
        with segyio.create(output_file, spec) as dst:
            # Copy and convert data
            for i in range(src.tracecount):
                ibm_trace = src.trace[i]
                ieee_trace = segyio.native(ibm_trace, current_format)
                dst.trace[i] = ieee_trace
                
                # Copy headers
                dst.header[i] = src.header[i]
            
            # Copy text headers
            for i in range(src.ext_headers + 1):
                dst.text[i] = src.text[i]
            
            # Update binary header
            for field in [segyio.BinField.Samples, segyio.BinField.Interval,
                         segyio.BinField.Traces, segyio.BinField.Format]:
                dst.bin[field] = src.bin[field]
            
            # Set new format
            dst.bin[segyio.BinField.Format] = segyio.SegySampleFormat.IEEE_FLOAT_4_BYTE

Install with Tessl CLI

npx tessl i tessl/pypi-segyio

docs

data-access.md

file-operations.md

header-access.md

index.md

seismic-unix.md

utilities.md

tile.json