tessl/pypi-pyedflib

Python library for reading and writing EDF+/BDF+ files used for storing biomedical signal data

—

Pending

Overview

Eval results

Files

High-Level Functions

Name: tessl/pypi-pyedflib
Author: tessl

Convenience functions for complete file operations, data conversion, file manipulation, and batch processing tasks. The pyedflib.highlevel module provides simplified interfaces for common EDF/BDF operations without requiring detailed knowledge of the low-level API.

Capabilities

Complete File I/O

Read and write entire EDF files with single function calls, handling header parsing and data extraction automatically.

def read_edf(edf_file: str, ch_nrs: Optional[List[int]] = None, 
             ch_names: Optional[List[str]] = None, digital: bool = False, 
             verbose: bool = False) -> Tuple[Union[np.ndarray, List[np.ndarray]], List[dict], dict]:
    """
    Read complete EDF file with signal data and headers.
    
    Parameters:
    - edf_file: str, path to EDF file
    - ch_nrs: List[int] or int or None, channel numbers to read (None = all)
    - ch_names: List[str] or str or None, channel names to read (None = all)
    - digital: bool, return digital values if True, physical values if False
    - verbose: bool, print progress information
    
    Returns:
    Tuple containing:
    - signals: numpy.ndarray or List[numpy.ndarray], signal data
    - signal_headers: List[dict], signal header information
    - header: dict, file header information
    """

def write_edf(edf_file: str, signals: Union[np.ndarray, List[np.ndarray]], signal_headers: List[Dict], 
              header: Optional[Dict] = None, digital: bool = False, 
              file_type: int = -1) -> bool:
    """
    Write complete EDF file with signal data and headers.
    
    Parameters:
    - edf_file: str, output file path
    - signals: numpy.ndarray, signal data (channels x samples or samples x channels)
    - signal_headers: List[dict], signal header configurations
    - header: dict or None, file header information
    - digital: bool, whether signals contain digital values
    - file_type: int, EDF file type (FILETYPE_EDF, FILETYPE_EDFPLUS, etc.)
    - block_size: int or None, block size for writing
    """

def write_edf_quick(edf_file: str, signals: np.ndarray, sfreq: Union[float, int], 
                   digital: bool = False) -> bool:
    """
    Quick write EDF with minimal configuration.
    
    Parameters:
    - edf_file: str, output file path
    - signals: numpy.ndarray, signal data
    - sfreq: int or List[int], sampling frequency(ies)
    - digital: bool, whether signals are digital values
    """

def read_edf_header(edf_file: str, read_annotations: bool = True) -> dict:
    """
    Read only header information from EDF file.
    
    Parameters:
    - edf_file: str, path to EDF file
    - read_annotations: bool, whether to read annotations
    
    Returns:
    dict: Complete file header with signal headers and annotations
    """

Usage examples:

import pyedflib.highlevel as hl
import numpy as np

# Read complete file
signals, signal_headers, header = hl.read_edf('data.edf')
print(f"Read {len(signals)} channels, {signals[0].shape[0]} samples each")

# Read specific channels by number
signals, sig_hdrs, hdr = hl.read_edf('data.edf', ch_nrs=[0, 2, 4])

# Read specific channels by name  
signals, sig_hdrs, hdr = hl.read_edf('data.edf', ch_names=['EEG Fp1', 'EEG C3'])

# Write complete file
sample_data = np.random.randn(4, 2560)  # 4 channels, 2560 samples
sample_headers = [hl.make_signal_header(f'CH{i}', sample_frequency=256) 
                  for i in range(4)]
hl.write_edf('output.edf', sample_data, sample_headers)

# Quick write with minimal setup
hl.write_edf_quick('quick.edf', sample_data, sfreq=256)

# Read only headers (fast)
header_info = hl.read_edf_header('data.edf')
signal_headers = header_info['SignalHeaders']
file_header = header_info

Header Creation Utilities

Create properly formatted header dictionaries for file and signal configuration.

def make_header(technician: str = '', recording_additional: str = '', 
                patientname: str = '', patient_additional: str = '', 
                patientcode: str = '', equipment: str = '', admincode: str = '', 
                sex: str = '', startdate: Optional[datetime] = None, 
                birthdate: str = '') -> Dict:
    """
    Create file header dictionary with standard fields.
    
    Parameters:
    - technician: str, technician name
    - recording_additional: str, additional recording information
    - patientname: str, patient name
    - patient_additional: str, additional patient information
    - patientcode: str, patient identification code
    - equipment: str, recording equipment description
    - admincode: str, administration code
    - sex: str, patient sex ('M', 'F', or '')
    - startdate: datetime or None, recording start time (None = current time)
    - birthdate: str, patient birthdate in YYYY-MM-DD format
    
    Returns:
    dict: File header dictionary
    """

def make_signal_header(label: str, dimension: str = 'uV', sample_frequency: int = 256, 
                      physical_min: float = -200.0, physical_max: float = 200.0, 
                      digital_min: int = -32768, digital_max: int = 32767, 
                      transducer: str = '', prefilter: str = '') -> Dict:
    """
    Create signal header dictionary with standard fields.
    
    Parameters:
    - label: str, signal label/name
    - dimension: str, physical units (e.g., 'uV', 'mV', 'V')
    - sample_frequency: int, sampling rate in Hz
    - physical_min: float, minimum physical value
    - physical_max: float, maximum physical value  
    - digital_min: int, minimum digital value
    - digital_max: int, maximum digital value
    - transducer: str, transducer description
    - prefilter: str, prefilter description
    
    Returns:
    dict: Signal header dictionary
    """

def make_signal_headers(list_of_labels: List[str], dimension: str = 'uV', 
                       sample_frequency: Optional[Union[int, float]] = 256, 
                       physical_min: float = -200.0, physical_max: float = 200.0, 
                       digital_min: Union[float, int] = -32768, 
                       digital_max: Union[float, int] = 32767, 
                       transducer: str = '', prefilter: str = '') -> List[Dict]:
    """
    Create multiple signal headers from list of labels with common parameters.
    
    Parameters:
    - list_of_labels: List[str], signal labels/names
    - dimension: str, physical units (e.g., 'uV', 'mV', 'V')
    - sample_frequency: int or float or None, sampling rate in Hz
    - physical_min: float, minimum physical value
    - physical_max: float, maximum physical value
    - digital_min: int or float, minimum digital value
    - digital_max: int or float, maximum digital value
    - transducer: str, transducer description
    - prefilter: str, prefilter description
    
    Returns:
    List[dict]: Complete signal header dictionaries
    """

Usage examples:

# Create complete file header
file_hdr = hl.make_header(
    technician='Dr. Johnson',
    patientname='Subject_001', 
    patientcode='S001',
    equipment='EEG-64 System',
    sex='F',
    birthdate='1990-05-15'
)

# Create signal header
sig_hdr = hl.make_signal_header(
    label='EEG Fp1',
    dimension='uV',
    sample_frequency=512,
    physical_min=-500,
    physical_max=500
)

# Create multiple headers from labels
labels = ['EEG Fp1', 'EEG Fp2', 'EEG C3', 'EEG C4']
signal_headers = hl.make_signal_headers(labels, sample_frequency=256, dimension='uV')

# Mix of labels and partial configs
mixed_headers = [
    'EEG Fp1',  # Will use defaults
    {'label': 'ECG', 'dimension': 'mV', 'sample_frequency': 1000},
    'EMG'
]
headers = hl.make_signal_headers(mixed_headers, sample_frequency=256)

Data Conversion

Convert between digital and physical values using calibration parameters.

def dig2phys(signal: Union[np.ndarray, int], dmin: int, dmax: int, 
             pmin: float, pmax: float) -> Union[np.ndarray, float]:
    """
    Convert digital values to physical values.
    
    Parameters:
    - signal: numpy.ndarray or int, digital signal values
    - dmin: int, digital minimum value
    - dmax: int, digital maximum value
    - pmin: float, physical minimum value  
    - pmax: float, physical maximum value
    
    Returns:
    numpy.ndarray or float: Physical values
    """

def phys2dig(signal: Union[np.ndarray, float], pmin: float, pmax: float, 
             dmin: int, dmax: int) -> Union[np.ndarray, int]:
    """
    Convert physical values to digital values.
    
    Parameters:
    - signal: numpy.ndarray or float, physical signal values
    - pmin: float, physical minimum value
    - pmax: float, physical maximum value
    - dmin: int, digital minimum value
    - dmax: int, digital maximum value
    
    Returns:
    numpy.ndarray or int: Digital values
    """

Usage examples:

# Convert digital signal to physical values (e.g., ADC counts to microvolts)
digital_data = np.array([1000, -500, 2000], dtype=np.int16)
physical_data = hl.dig2phys(digital_data, -32768, 32767, -500.0, 500.0)

# Convert physical signal to digital values
eeg_signal = np.array([100.5, -75.2, 200.1])  # microvolts
digital_values = hl.phys2dig(eeg_signal, -500.0, 500.0, -32768, 32767)

File Manipulation

Modify existing EDF files by dropping channels, cropping time ranges, and renaming channels.

def drop_channels(edf_source: str, edf_target: Optional[str] = None,
                 to_keep: Optional[Union[List[str], List[int]]] = None,
                 to_drop: Optional[Union[List[str], List[int]]] = None,
                 verbose: bool = False) -> str:
    """
    Remove channels from EDF file.
    
    Parameters:
    - edf_source: str, source EDF file
    - edf_target: str or None, output file path (None = auto-generated)
    - to_keep: List[str or int] or None, channels to keep (overrides to_drop)
    - to_drop: List[str or int] or None, channels to remove (by name or index)
    - verbose: bool, print operation details
    
    Returns:
    str: Path to output file
    """

def crop_edf(edf_file: str, *, new_file: Optional[str] = None,
            start: Optional[Union[datetime, int, float]] = None,
            stop: Optional[Union[datetime, int, float]] = None,
            start_format: str = "datetime", stop_format: str = "datetime",
            verbose: bool = True) -> None:
    """
    Crop EDF file to desired time range.
    
    Parameters:
    - edf_file: str, source EDF file
    - new_file: str or None, output file path (None = auto-generated)
    - start: datetime or int or float or None, new start time (datetime or seconds)
    - stop: datetime or int or float or None, new stop time (datetime or seconds)
    - start_format: str, format of start ("datetime" or "seconds")
    - stop_format: str, format of stop ("datetime" or "seconds")
    - verbose: bool, show progress
    """

def rename_channels(edf_file: str, mapping: Union[Dict[str, str], List[str]], 
                   new_file: Optional[str] = None, verbose: bool = True) -> None:
    """
    Rename channels in EDF file.
    
    Parameters:
    - edf_file: str, path to input EDF file
    - mapping: dict or List[str], channel name mapping or new names list
    - new_file: str or None, output file path (None = modify in-place)
    - verbose: bool, print operation details
    """

def change_polarity(edf_file: str, channels: Union[List[str], str], 
                   new_file: Optional[str] = None, verbose: bool = True) -> None:
    """
    Change signal polarity (multiply by -1) for specified channels.
    
    Parameters:
    - edf_file: str, path to input EDF file
    - channels: List[str] or str, channel names to invert
    - new_file: str or None, output file path (None = modify in-place)
    - verbose: bool, print operation details
    """

Usage examples:

# Drop specific channels
hl.drop_channels('recording.edf', to_drop=['EMG1', 'EMG2', 'EOG'])

# Keep only EEG channels
hl.drop_channels('recording.edf', to_keep=['EEG Fp1', 'EEG Fp2', 'EEG C3', 'EEG C4'])

# Crop to specific time window (10-60 seconds)
hl.crop_edf('long_recording.edf', start_sec=10, end_sec=60, copy_file=True)

# Rename channels with mapping
channel_mapping = {
    'EEG FP1-REF': 'EEG Fp1',
    'EEG FP2-REF': 'EEG Fp2',
    'EEG C3-REF': 'EEG C3'
}
hl.rename_channels('raw.edf', channel_mapping, new_file='renamed.edf')

# Rename with new names list
new_names = ['Ch1', 'Ch2', 'Ch3', 'Ch4']
hl.rename_channels('data.edf', new_names)

# Invert polarity of specific channels
hl.change_polarity('eeg.edf', channels=['EEG C3', 'EEG C4'])

File Comparison and Validation

Compare EDF files and validate file integrity.

def compare_edf(edf_file1: str, edf_file2: str, verbose: bool = True) -> bool:
    """
    Compare two EDF files for differences in headers and data.
    
    Parameters:
    - edf_file1: str, path to first EDF file
    - edf_file2: str, path to second EDF file
    - verbose: bool, print detailed comparison results
    
    Returns:
    bool: True if files are identical, False otherwise
    """

Usage example:

# Compare two files
are_identical = hl.compare_edf('original.edf', 'processed.edf')
if are_identical:
    print("Files are identical")
else:
    print("Files differ")

Anonymization

Remove or modify patient-identifying information in EDF files for privacy compliance.

def anonymize_edf(edf_file: str, new_file: Optional[str] = None,
                 to_remove: List[str] = ['patientname', 'birthdate'], 
                 new_values: List[str] = ['xxx', ''],
                 verify: bool = False, verbose: bool = False) -> bool:
    """
    Anonymize EDF file by replacing header fields.
    
    Parameters:
    - edf_file: str, source EDF file
    - new_file: str or None, output file path (None = auto-generated)
    - to_remove: List[str], header fields to replace
    - new_values: List[str], replacement values
    - verify: bool, compare files after anonymization
    - verbose: bool, show progress
    
    Returns:
    bool: True if successful
    """

Usage example:

# Remove patient identifying information
hl.anonymize_edf('patient_data.edf', 
                new_file='anonymous.edf',
                to_remove=['patientname', 'patientcode', 'patient_additional'])

# Replace with anonymous values
new_values = {
    'patientname': 'Anonymous',
    'patientcode': 'ANON001',
    'patient_additional': '',
    'birthdate': '1900-01-01'
}
hl.anonymize_edf('identifiable.edf', 
                new_file='deidentified.edf',
                new_values=new_values)

Utility Functions

def tqdm(iterable: Iterable, *args, **kwargs) -> Iterable:
    """
    Progress bar wrapper for long operations.
    
    Optional dependency - install with: pip install tqdm
    Falls back to standard iterator if tqdm not available.
    
    Parameters:
    - iterable: Iterable, sequence to iterate over
    - *args, **kwargs: Additional arguments passed to tqdm
    
    Returns:
    Iterable: Progress bar iterator or standard iterator
    """

Complete High-Level Workflow Example

import pyedflib.highlevel as hl
import numpy as np
from datetime import datetime

# Step 1: Read existing file
print("Reading EDF file...")
signals, signal_headers, header = hl.read_edf('raw_recording.edf', verbose=True)

print(f"Loaded {len(signals)} channels:")
for i, hdr in enumerate(signal_headers):
    print(f"  {i}: {hdr['label']} ({hdr['sample_frequency']} Hz)")

# Step 2: Process signals (example: apply high-pass filter simulation)
print("Processing signals...")
for i in range(len(signals)):
    # Simple high-pass filter simulation (remove DC offset)
    signals[i] = signals[i] - np.mean(signals[i])

# Step 3: Create new file with processed data
print("Creating processed file...")
new_header = hl.make_header(
    technician='Automated Processing',
    recording_additional='High-pass filtered',
    patientname=header.get('patientname', 'Unknown'),
    patientcode=header.get('patientcode', 'UNK'),
    equipment='pyEDFlib processor'
)

# Update signal headers to reflect processing
for hdr in signal_headers:
    hdr['prefilter'] = hdr.get('prefilter', '') + ' HP:DC_removed'

hl.write_edf('processed.edf', signals, signal_headers, new_header)

# Step 4: Crop to analysis window
print("Cropping to analysis window...")
hl.crop_edf('processed.edf', start_sec=30, end_sec=300)  # 30-300 seconds

# Step 5: Remove artifact channels
print("Removing artifact channels...")
hl.drop_channels('processed.edf', to_drop=['EMG', 'EOG', 'ECG'])

# Step 6: Anonymize for sharing
print("Creating anonymized version...")
hl.anonymize_edf('processed.edf', 
                new_file='anonymous_processed.edf',
                new_values={
                    'patientname': 'Subject_001',
                    'patientcode': 'S001',
                    'patient_additional': 'Age_group_20-30',
                    'birthdate': '1990-01-01'
                })

# Step 7: Verify final result
print("Verifying final file...")
final_signals, final_headers, final_header = hl.read_edf_header('anonymous_processed.edf')
print(f"Final file has {len(final_headers)} channels")
print(f"Patient name: {final_header['patientname']}")

print("Processing complete!")

Install with Tessl CLI