Simple & fast IO for SEG-Y files
—
Helper functions for format conversion, metadata extraction, array-to-SEG-Y conversion, and common seismic processing operations. These utilities simplify common workflows and provide high-level convenience functions.
Functions for working with sample rates, timing information, and creating sample indices.
def dt(f, fallback_dt=4000.0):
"""
Infer sample rate from SEG-Y file.
Parameters:
- f (SegyFile): Open SEG-Y file handle
- fallback_dt (float): Fallback sample rate in microseconds if cannot be determined
Returns:
float: Sample interval in microseconds
Raises:
ValueError: If sample rate cannot be determined and no fallback provided
"""
def sample_indexes(segyfile, t0=0.0, dt_override=None):
"""
Create sample index list at specified depth/time values.
Parameters:
- segyfile (SegyFile): Open SEG-Y file handle
- t0 (float): Start time in milliseconds, default 0.0
- dt_override (float): Override sample rate in milliseconds
Returns:
numpy.ndarray: Array of sample indices/times
"""Usage Example:
import segyio
import numpy as np
with segyio.open('data.sgy') as f:
# Get sample rate
sample_rate = segyio.dt(f)
print(f"Sample rate: {sample_rate} microseconds ({sample_rate/1000} ms)")
# Create time axis
times = segyio.sample_indexes(f, t0=0.0)
print(f"Time range: {times[0]} to {times[-1]} ms")
# Create custom time axis with different start time
delayed_times = segyio.sample_indexes(f, t0=100.0) # Start at 100ms
# Override sample rate
custom_times = segyio.sample_indexes(f, dt_override=2.0) # 2ms samplingUtilities for creating and formatting SEG-Y textual headers.
def create_text_header(lines):
"""
Format textual header from line dictionary.
Parameters:
- lines (dict): Line number to text mapping (1-40)
Returns:
str: Formatted EBCDIC header string (3200 bytes)
Raises:
ValueError: Invalid line numbers or content
"""Usage Example:
import segyio
# Create custom textual header
header_lines = {
1: "C 1 CLIENT: EXAMPLE SEISMIC COMPANY",
2: "C 2 DATA TYPE: MIGRATED 3D SEISMIC DATA",
3: "C 3 AREA: NORTH SEA BLOCK 123/45",
4: "C 4 ",
5: "C 5 PROCESSING SEQUENCE:",
6: "C 6 1. DEMULTIPLE",
7: "C 7 2. DECONVOLUTION",
8: "C 8 3. VELOCITY ANALYSIS",
9: "C 9 4. DMO CORRECTION",
10: "C10 5. POST-STACK TIME MIGRATION",
11: "C11 ",
12: "C12 DATUM: MEAN SEA LEVEL",
13: "C13 COORDINATE SYSTEM: UTM ZONE 31N",
14: "C14 SAMPLE RATE: 4 MS",
15: "C15 RECORD LENGTH: 2000 MS"
}
text_header = segyio.create_text_header(header_lines)
# Use in file creation
spec = segyio.spec()
# ... configure spec ...
with segyio.create('output.sgy', spec) as f:
f.text[0] = text_headerFunctions for converting between different data formats and handling IBM floating point.
def native(data, format=SegySampleFormat.IBM_FLOAT_4_BYTE, copy=True):
"""
Convert numpy array to native float format.
Parameters:
- data (numpy.ndarray): Input data array
- format (int or SegySampleFormat): Source data format
- copy (bool): Whether to copy data or modify in-place
Returns:
numpy.ndarray: Data converted to native float format
Raises:
ValueError: Unsupported format or invalid data
"""Usage Example:
import segyio
import numpy as np
with segyio.open('ibm_data.sgy') as f:
# Read trace with IBM float format
ibm_trace = f.trace[0]
# Convert to native IEEE float
ieee_trace = segyio.native(ibm_trace, segyio.SegySampleFormat.IBM_FLOAT_4_BYTE)
# Check format in binary header
format_code = f.bin[segyio.BinField.Format]
if format_code == segyio.SegySampleFormat.IBM_FLOAT_4_BYTE:
print("Converting from IBM to IEEE float")
converted = segyio.native(ibm_trace, format_code)
else:
converted = ibm_trace # Already in native formatFunctions for collecting and combining trace data into larger arrays.
def collect(itr):
"""
Collect iterable of traces/lines into single ndarray.
Parameters:
- itr (iterable): Iterable of numpy.ndarray objects (traces, lines, etc.)
Returns:
numpy.ndarray: Combined array with additional dimension
Raises:
ValueError: Inconsistent array shapes
"""
def cube(f):
"""
Read full 3D cube from file into memory.
Parameters:
- f (str or SegyFile): File path or open file handle
Returns:
numpy.ndarray: 3D array (inlines, crosslines, samples)
Raises:
ValueError: File is not 3D structured
MemoryError: Insufficient memory for cube
"""Usage Example:
import segyio
import numpy as np
# Read and collect multiple inlines
with segyio.open('3d_data.sgy') as f:
# Collect first 10 inlines into 3D array
inline_list = [f.iline[il] for il in f.ilines[:10]]
collected_inlines = segyio.collect(inline_list)
print(f"Collected shape: {collected_inlines.shape}")
# Read full cube (memory intensive!)
full_cube = segyio.cube(f)
print(f"Full cube shape: {full_cube.shape}")
# Collect traces with processing
with segyio.open('data.sgy') as f:
def process_trace(trace):
# Apply some processing
return trace * 2.0 # Simple gain
processed_traces = segyio.collect(
process_trace(f.trace[i]) for i in range(min(100, f.tracecount))
)Functions for extracting survey metadata and geometry information.
def rotation(f, line='fast'):
"""
Find rotation angle of survey.
Parameters:
- f (SegyFile): Open SEG-Y file handle
- line (str): 'fast' or 'slow' dimension
Returns:
tuple: (rotation_angle, cdpx_center, cdpy_center)
"""
def metadata(f):
"""
Extract survey metadata as spec object.
Parameters:
- f (str or SegyFile): File path or open file handle
Returns:
segyio.spec: Specification object containing file metadata
"""Usage Example:
import segyio
# Analyze survey rotation
with segyio.open('3d_survey.sgy') as f:
rotation_angle, center_x, center_y = segyio.rotation(f)
print(f"Survey rotation: {rotation_angle:.2f} degrees")
print(f"Survey center: ({center_x:.2f}, {center_y:.2f})")
# Extract complete metadata
survey_spec = segyio.metadata('survey.sgy')
print(f"Inline range: {survey_spec.ilines}")
print(f"Crossline range: {survey_spec.xlines}")
print(f"Sample count: {len(survey_spec.samples)}")
print(f"Data format: {survey_spec.format}")Functions for creating SEG-Y files from numpy arrays with various geometries.
def from_array(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
"""
Create SEG-Y file from n-dimensional numpy array.
Parameters:
- filename (str): Output file path
- data (numpy.ndarray): Input data array
- iline (int): Inline header field number
- xline (int): Crossline header field number
- format (SegySampleFormat): Output data format
- dt (int): Sample interval in microseconds
- delrt (int): Delay recording time in milliseconds
Returns:
None: Creates file on disk
"""
def from_array2D(filename, data, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
"""
Create SEG-Y file from 2D array (shot records).
Parameters:
- filename (str): Output file path
- data (numpy.ndarray): 2D array (traces, samples)
- format (SegySampleFormat): Output data format
- dt (int): Sample interval in microseconds
- delrt (int): Delay recording time in milliseconds
Returns:
None: Creates file from 2D geometry
"""
def from_array3D(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
"""
Create SEG-Y file from 3D array (post-stack volume).
Parameters:
- filename (str): Output file path
- data (numpy.ndarray): 3D array (inlines, crosslines, samples)
- iline (int): Inline header field number
- xline (int): Crossline header field number
- format (SegySampleFormat): Output data format
- dt (int): Sample interval in microseconds
- delrt (int): Delay recording time in milliseconds
Returns:
None: Creates file from 3D geometry
"""
def from_array4D(filename, data, iline=189, xline=193, format=SegySampleFormat.IBM_FLOAT_4_BYTE, dt=4000, delrt=0):
"""
Create SEG-Y file from 4D array (pre-stack volume).
Parameters:
- filename (str): Output file path
- data (numpy.ndarray): 4D array (inlines, crosslines, offsets, samples)
- iline (int): Inline header field number
- xline (int): Crossline header field number
- format (SegySampleFormat): Output data format
- dt (int): Sample interval in microseconds
- delrt (int): Delay recording time in milliseconds
Returns:
None: Creates file from 4D geometry
"""Usage Example:
import segyio
import numpy as np
# Create synthetic seismic data
ni, nx, nt = 50, 40, 500 # 50 inlines, 40 crosslines, 500 samples
synthetic_data = np.random.randn(ni, nx, nt).astype(np.float32)
# Add some realistic seismic features
for i in range(ni):
for j in range(nx):
# Add a reflector at 200ms
synthetic_data[i, j, 50:60] += 0.5 * np.sin(np.linspace(0, 4*np.pi, 10))
# Convert to SEG-Y
segyio.from_array3D(
'synthetic_3d.sgy',
synthetic_data,
format=segyio.SegySampleFormat.IEEE_FLOAT_4_BYTE,
dt=4000, # 4ms sampling
delrt=0 # Start at 0ms
)
# 2D seismic line (shot records)
shot_data = np.random.randn(120, 1000).astype(np.float32) # 120 traces, 1000 samples
segyio.from_array2D('shot_record.sgy', shot_data, dt=2000) # 2ms sampling
# 4D pre-stack data
prestack_data = np.random.randn(30, 25, 5, 750).astype(np.float32) # 5 offsets
segyio.from_array4D('prestack_4d.sgy', prestack_data, dt=4000)Destructive resampling operations for changing sample rates and timing.
def resample(f, rate=None, delay=None, micro=False, trace=True, binary=True):
"""
Resample file traces (destructive operation).
Parameters:
- f (SegyFile): File handle (must be writable)
- rate (float): New sample rate
- delay (float): New delay time
- micro (bool): Whether rate/delay are in microseconds
- trace (bool): Update trace headers
- binary (bool): Update binary header
Returns:
SegyFile: Modified file handle
Raises:
ValueError: Invalid resampling parameters
RuntimeError: File not writable
"""Usage Example:
import segyio
# Resample file from 4ms to 2ms
with segyio.open('input_4ms.sgy', 'r+') as f:
print(f"Original sample rate: {segyio.dt(f)/1000} ms")
# Resample to 2ms (note: this is destructive!)
segyio.resample(f, rate=2.0, micro=False) # 2ms
print(f"New sample rate: {segyio.dt(f)/1000} ms")
# Update delay time to 100ms
segyio.resample(f, delay=100.0, micro=False)def qc_workflow(input_file, output_report):
"""Complete QC workflow for SEG-Y files."""
report = {}
with segyio.open(input_file) as f:
# Basic file information
report['filename'] = input_file
report['traces'] = f.tracecount
report['samples'] = len(f.samples)
report['sample_rate'] = segyio.dt(f) / 1000 # ms
# Geometry analysis
if not f.unstructured:
report['inlines'] = len(f.ilines)
report['crosslines'] = len(f.xlines)
report['offsets'] = len(f.offsets)
# Survey rotation
rotation_angle, _, _ = segyio.rotation(f)
report['rotation'] = rotation_angle
# Amplitude analysis
sample_traces = [f.trace[i] for i in range(0, f.tracecount, max(1, f.tracecount//100))]
all_samples = np.concatenate(sample_traces)
report['amplitude_stats'] = {
'min': float(all_samples.min()),
'max': float(all_samples.max()),
'mean': float(all_samples.mean()),
'std': float(all_samples.std())
}
# Save report
import json
with open(output_report, 'w') as f:
json.dump(report, f, indent=2)
return reportdef convert_to_ieee(input_file, output_file):
"""Convert IBM float SEG-Y to IEEE float format."""
with segyio.open(input_file) as src:
# Check current format
current_format = src.bin[segyio.BinField.Format]
if current_format != segyio.SegySampleFormat.IBM_FLOAT_4_BYTE:
print("File is not IBM float format")
return
# Create new file spec
spec = segyio.metadata(src)
spec.format = segyio.SegySampleFormat.IEEE_FLOAT_4_BYTE
with segyio.create(output_file, spec) as dst:
# Copy and convert data
for i in range(src.tracecount):
ibm_trace = src.trace[i]
ieee_trace = segyio.native(ibm_trace, current_format)
dst.trace[i] = ieee_trace
# Copy headers
dst.header[i] = src.header[i]
# Copy text headers
for i in range(src.ext_headers + 1):
dst.text[i] = src.text[i]
# Update binary header
for field in [segyio.BinField.Samples, segyio.BinField.Interval,
segyio.BinField.Traces, segyio.BinField.Format]:
dst.bin[field] = src.bin[field]
# Set new format
dst.bin[segyio.BinField.Format] = segyio.SegySampleFormat.IEEE_FLOAT_4_BYTEInstall with Tessl CLI
npx tessl i tessl/pypi-segyio