tessl/pypi-segyio

Simple & fast IO for SEG-Y files

—

Pending

Overview

Eval results

Files

Data Access

Name: tessl/pypi-segyio
Author: tessl

Multiple specialized interfaces for accessing seismic data organized by traces, inlines, crosslines, depth slices, and gathers. Each access mode provides a different view of the same underlying data optimized for specific workflows.

Capabilities

Individual Trace Access

Direct access to individual seismic traces by trace index. Most fundamental access pattern.

# Trace access mode (available as f.trace)
f.trace[index]           # Read trace by index (returns numpy.ndarray)
f.trace[start:end]       # Read multiple traces (returns numpy.ndarray) 
f.trace[start:end:step]  # Read traces with step (returns numpy.ndarray)
f.trace[index] = data    # Write trace data (if file writable)

Usage Example:

with segyio.open('data.sgy') as f:
    # Read single trace
    first_trace = f.trace[0]
    print(f"Trace shape: {first_trace.shape}")
    print(f"Trace dtype: {first_trace.dtype}")
    
    # Read multiple traces
    traces_10_to_19 = f.trace[10:20]
    print(f"Multiple traces shape: {traces_10_to_19.shape}")
    
    # Read every 10th trace
    sampled_traces = f.trace[::10]
    
    # Iterate over all traces
    for i, trace in enumerate(f.trace):
        # Process each trace
        amplitude = trace.max()
        print(f"Trace {i} max amplitude: {amplitude}")
        
        if i >= 100:  # Limit for example
            break

# Writing traces
with segyio.open('data.sgy', 'r+') as f:
    # Modify a trace
    modified_trace = f.trace[0] * 2.0  # Double amplitudes
    f.trace[0] = modified_trace
    
    # Write synthetic data
    synthetic = np.sin(np.linspace(0, 10*np.pi, len(f.samples)))
    f.trace[0] = synthetic

Inline Access

Access seismic data organized by inline number (3D structured files only). Each inline represents a 2D vertical slice parallel to the acquisition direction.

# Inline access mode (available as f.iline)
f.iline[inline_number]              # Read inline slice (returns 2D numpy.ndarray)
f.iline[inline_number, crosslines]  # Read subset of crosslines
f.iline[inline_number] = data       # Write inline data (if file writable)

Usage Example:

with segyio.open('3d_volume.sgy') as f:
    print(f"Available inlines: {f.ilines}")
    
    # Read complete inline
    inline_100 = f.iline[100]
    print(f"Inline shape: {inline_100.shape}")  # (n_crosslines, n_samples)
    
    # Read subset of crosslines for an inline
    partial_inline = f.iline[100, 200:250]  # Crosslines 200-249
    
    # Iterate over all inlines
    for il in f.ilines:
        inline_data = f.iline[il]
        avg_amplitude = inline_data.mean()
        print(f"Inline {il} average amplitude: {avg_amplitude}")
    
    # Process inline with numpy operations
    inline_data = f.iline[150]
    
    # Apply gain correction
    gained = inline_data * 1.5
    
    # Apply AGC (Automatic Gain Control) 
    window_size = 50
    agc_data = np.zeros_like(inline_data)
    for i, trace in enumerate(inline_data.T):
        for j in range(len(trace)):
            start = max(0, j - window_size//2)
            end = min(len(trace), j + window_size//2)
            window_rms = np.sqrt(np.mean(trace[start:end]**2))
            if window_rms > 0:
                agc_data[i, j] = trace[j] / window_rms

Crossline Access

Access seismic data organized by crossline number (3D structured files only). Each crossline represents a 2D vertical slice perpendicular to the acquisition direction.

# Crossline access mode (available as f.xline)
f.xline[crossline_number]            # Read crossline slice (returns 2D numpy.ndarray)
f.xline[crossline_number, inlines]   # Read subset of inlines
f.xline[crossline_number] = data     # Write crossline data (if file writable)

Usage Example:

with segyio.open('3d_volume.sgy') as f:
    print(f"Available crosslines: {f.xlines}")
    
    # Read complete crossline
    crossline_300 = f.xline[300]
    print(f"Crossline shape: {crossline_300.shape}")  # (n_inlines, n_samples)
    
    # Compare inline vs crossline
    il_data = f.iline[100]
    xl_data = f.xline[300]
    
    print(f"Inline 100 dimensions: {il_data.shape}")
    print(f"Crossline 300 dimensions: {xl_data.shape}")
    
    # Find intersection point
    intersection = f.trace[f.index(inline=100, crossline=300)]
    
    # Extract same trace from both slices
    # Find position of crossline 300 in inline 100
    xl_pos = list(f.xlines).index(300)
    trace_from_inline = il_data[xl_pos, :]
    
    # Find position of inline 100 in crossline 300  
    il_pos = list(f.ilines).index(100)
    trace_from_crossline = xl_data[il_pos, :]
    
    # These should be identical
    assert np.allclose(trace_from_inline, trace_from_crossline)

Fast and Slow Dimension Access

Access data along the fast and slow dimensions based on file sorting. Provides consistent access regardless of whether file is inline-sorted or crossline-sorted.

# Fast/slow dimension access (available as f.fast and f.slow)
f.fast[index]       # Read along fast dimension
f.slow[index]       # Read along slow dimension

Usage Example:

with segyio.open('data.sgy') as f:
    print(f"File sorting: {f.sorting}")
    
    if f.sorting == segyio.TraceSortingFormat.INLINE_SORTING:
        print("Fast=inline, Slow=crossline")
        assert f.fast == f.iline
        assert f.slow == f.xline
    elif f.sorting == segyio.TraceSortingFormat.CROSSLINE_SORTING:
        print("Fast=crossline, Slow=inline")
        assert f.fast == f.xline
        assert f.slow == f.iline
    
    # Access first line along fast dimension
    fast_line = f.fast[f.fast.indices[0]]
    
    # Access first line along slow dimension  
    slow_line = f.slow[f.slow.indices[0]]

Depth Slice Access

Access horizontal slices at specific sample indices (time/depth slices). Useful for horizon mapping and amplitude analysis.

# Depth slice access mode (available as f.depth_slice)
f.depth_slice[sample_index]          # Read horizontal slice at sample index
f.depth_slice[sample_index] = data   # Write depth slice (if file writable)

Usage Example:

with segyio.open('3d_volume.sgy') as f:
    print(f"Sample range: {f.samples}")
    
    # Read depth slice at 500ms (sample index 125 for 4ms sampling)
    sample_index = 125
    depth_slice = f.depth_slice[sample_index]
    print(f"Depth slice shape: {depth_slice.shape}")  # (n_inlines, n_crosslines)
    
    # Create amplitude map for interpretation
    import matplotlib.pyplot as plt
    
    plt.figure(figsize=(10, 8))
    plt.imshow(depth_slice, cmap='seismic', aspect='auto')
    plt.title(f'Amplitude at {f.samples[sample_index]}ms')
    plt.xlabel('Crossline')
    plt.ylabel('Inline') 
    plt.colorbar(label='Amplitude')
    
    # Find maximum amplitude location
    max_pos = np.unravel_index(np.argmax(np.abs(depth_slice)), depth_slice.shape)
    max_inline = f.ilines[max_pos[0]]
    max_crossline = f.xlines[max_pos[1]]
    print(f"Maximum amplitude at IL={max_inline}, XL={max_crossline}")
    
    # Extract amplitude attributes along a horizon
    horizon_samples = np.full(depth_slice.shape, sample_index)  # Flat horizon
    
    # For variable horizon, load from interpretation
    # horizon_samples = load_horizon_picks('horizon.dat')
    
    # Extract attributes along horizon
    horizon_amplitudes = np.zeros(depth_slice.shape)
    for i, il in enumerate(f.ilines):
        for j, xl in enumerate(f.xlines):
            sample_idx = int(horizon_samples[i, j])
            if 0 <= sample_idx < len(f.samples):
                horizon_amplitudes[i, j] = f.depth_slice[sample_idx][i, j]

Gather Access

Access pre-stack gathers organized by Common Depth Point (CDP) or other grouping criteria. Essential for velocity analysis and pre-stack processing.

# Gather access mode (available as f.gather)
f.gather[cdp_number]              # Read gather by CDP number
f.gather[cdp_number, offsets]     # Read subset of offsets

Usage Example:

# Pre-stack data with multiple offsets
with segyio.open('prestack.sgy') as f:
    print(f"Available offsets: {f.offsets}")
    
    # Get first CDP number
    first_cdp = f.header[0][segyio.TraceField.CDP]
    
    # Read complete gather  
    gather = f.gather[first_cdp]
    print(f"Gather shape: {gather.shape}")  # (n_offsets, n_samples)
    
    # Read subset of offsets
    near_offsets = f.gather[first_cdp, f.offsets[:3]]  # First 3 offsets
    
    # Velocity analysis - compute semblance
    def compute_semblance(gather, times, velocities):
        """Compute semblance for velocity analysis."""
        semblance = np.zeros((len(times), len(velocities)))
        
        for i, t in enumerate(times):
            for j, v in enumerate(velocities):
                # NMO correction
                corrected = np.zeros_like(gather)
                for k, offset in enumerate(f.offsets):
                    t_nmo = np.sqrt(t**2 + (offset/v)**2)
                    # Interpolate to corrected time
                    # (simplified - would use proper interpolation)
                    sample_idx = int(t_nmo * 1000 / 4)  # 4ms sampling
                    if sample_idx < gather.shape[1]:
                        corrected[k, i] = gather[k, sample_idx]
                
                # Compute semblance
                stack = corrected.sum(axis=0)
                energy_stack = (stack**2).sum()
                energy_individual = (corrected**2).sum()
                
                if energy_individual > 0:
                    semblance[i, j] = energy_stack / energy_individual
        
        return semblance
    
    # Example velocity analysis
    times = np.arange(0, 2.0, 0.004)  # 0-2 seconds
    velocities = np.arange(1500, 4000, 50)  # 1500-4000 m/s
    
    semblance = compute_semblance(gather, times, velocities)
    
    # Find maximum velocity at each time
    optimal_velocities = velocities[np.argmax(semblance, axis=1)]

Multi-dimensional Indexing

Combined indexing across multiple dimensions for precise data access.

with segyio.open('3d_volume.sgy') as f:
    # Get trace index from inline/crossline coordinates
    trace_idx = f.index(inline=100, crossline=200)
    trace = f.trace[trace_idx]
    
    # Alternative: direct access via coordinates
    trace = f.trace[f.index(100, 200)]
    
    # Bulk operations on subsets
    inline_subset = f.iline[100:110]  # Multiple inlines
    crossline_subset = f.xline[200:210]  # Multiple crosslines
    
    # Advanced slicing
    subvolume = f.iline[100:110, 200:210]  # Sub-cube extraction

Data Processing Examples

Amplitude Statistics

def compute_amplitude_stats(filename):
    """Compute amplitude statistics for SEG-Y file."""
    with segyio.open(filename) as f:
        all_amplitudes = []
        
        # Sample every 100th trace for efficiency
        for i in range(0, f.tracecount, 100):
            trace = f.trace[i]
            all_amplitudes.extend(trace)
        
        amplitudes = np.array(all_amplitudes)
        
        return {
            'min': amplitudes.min(),
            'max': amplitudes.max(), 
            'mean': amplitudes.mean(),
            'std': amplitudes.std(),
            'rms': np.sqrt(np.mean(amplitudes**2))
        }

Data QC and Validation

def qc_seismic_data(filename):
    """Perform basic QC on seismic data."""
    issues = []
    
    with segyio.open(filename) as f:
        # Check for dead traces
        dead_traces = []
        for i in range(min(1000, f.tracecount)):
            trace = f.trace[i]
            if np.all(trace == 0) or np.all(np.isnan(trace)):
                dead_traces.append(i)
        
        if dead_traces:
            issues.append(f"Dead traces found: {len(dead_traces)}")
        
        # Check for clipped data
        clipped_traces = []
        for i in range(min(1000, f.tracecount)):
            trace = f.trace[i]
            if np.any(np.abs(trace) >= 32767):  # Common clipping level
                clipped_traces.append(i)
        
        if clipped_traces:
            issues.append(f"Clipped traces found: {len(clipped_traces)}")
        
        # Check geometry consistency
        if not f.unstructured:
            expected_traces = len(f.ilines) * len(f.xlines) * len(f.offsets)
            if f.tracecount != expected_traces:
                issues.append(f"Trace count mismatch: {f.tracecount} vs {expected_traces}")
    
    return issues

Install with Tessl CLI