A library of algorithms for the baseline correction of experimental data.
—
Specialized baseline correction algorithms that don't fit into standard categories. These methods use unique approaches like simultaneous denoising and baseline estimation, or direct interpolation between user-defined baseline points. They provide alternative solutions for specific data types and use cases.
Advanced method that simultaneously performs baseline correction and noise reduction using spline-based optimization with multiple regularization terms.
def beads(data, freq_cutoff=0.005, lam_0=1.0, lam_1=1.0, lam_2=1.0, asymmetry=6.0, filter_type=1, cost_function=2, max_iter=50, tol=1e-2, eps_0=1e-6, eps_1=1e-6, fit_parabola=True, smooth_half_window=None, x_data=None):
"""
Baseline Estimation And Denoising using Splines (BEADS).
Simultaneously estimates baseline and reduces noise through multi-objective
optimization with spline smoothing and asymmetric penalties.
Parameters:
- data (array-like): Input y-values to process for baseline and denoising
- freq_cutoff (float): Cutoff frequency for high-pass filter (0 < freq_cutoff < 0.5)
- lam_0 (float): Regularization parameter for baseline smoothness
- lam_1 (float): Regularization parameter for first derivative penalty
- lam_2 (float): Regularization parameter for second derivative penalty
- asymmetry (float): Asymmetry parameter for baseline-peak separation
- filter_type (int): Type of high-pass filter (1 or 2)
- cost_function (int): Cost function for optimization (1 or 2)
- max_iter (int): Maximum iterations for optimization convergence
- tol (float): Convergence tolerance for iterative fitting
- eps_0 (float): Tolerance parameter for baseline estimation
- eps_1 (float): Tolerance parameter for noise reduction
- fit_parabola (bool): Whether to fit parabolic trend to data
- smooth_half_window (int, optional): Half-window size for final smoothing
- x_data (array-like, optional): Input x-values
Returns:
tuple: (baseline, params) where params contains both baseline and denoised signal
Additional keys: 'denoised_signal', 'optimization_history'
"""Direct baseline estimation by interpolating between user-specified baseline points using various interpolation methods.
def interp_pts(data, baseline_points, interp_method='linear', x_data=None):
"""
Interpolate baseline between specified baseline points.
Creates baseline by interpolating between manually identified or
algorithmically determined baseline points using specified interpolation method.
Parameters:
- data (array-like): Input y-values (used primarily for output array dimensions)
- baseline_points (array-like): Indices or (x,y) coordinates of baseline points
If 1D array: treated as indices into data array
If 2D array: treated as (x,y) coordinate pairs
- interp_method (str): Interpolation method for baseline construction
Options: 'linear', 'cubic', 'quadratic', 'nearest', 'pchip'
- x_data (array-like, optional): Input x-values for coordinate-based interpolation
Returns:
tuple: (baseline, params) where params contains interpolation details
Additional keys: 'interp_method', 'baseline_points_used', 'x_baseline_points'
"""import numpy as np
from pybaselines.misc import beads
# Sample noisy spectroscopic data with baseline drift
x = np.linspace(0, 1000, 1500)
baseline_true = 20 + 0.03 * x + 0.00002 * x**2
peaks = (200 * np.exp(-((x - 250) / 40)**2) +
150 * np.exp(-((x - 600) / 35)**2) +
180 * np.exp(-((x - 850) / 45)**2))
noise = np.random.normal(0, 5, len(x)) # Significant noise
data = baseline_true + peaks + noise
# BEADS performs both baseline correction and denoising
baseline, params = beads(data, lam_0=0.5, lam_1=5, lam_2=4, asymmetry=0.1)
corrected = data - baseline
denoised = params['denoised_signal'] # Denoised version of original data
print(f"Baseline estimation converged in {params.get('n_iter', 'N/A')} iterations")
print(f"Noise reduction factor: {np.std(data)/np.std(denoised):.2f}")from pybaselines.misc import interp_pts
# Define baseline points manually (could be from peak picking algorithms)
# Using indices into the data array
baseline_indices = [0, 150, 300, 500, 750, 1000, 1499] # Points known to be baseline
# Create baseline by linear interpolation
baseline, params = interp_pts(data, baseline_indices, interp_method='linear')
corrected = data - baseline
print(f"Interpolated between {len(baseline_indices)} baseline points")
print(f"Used {params['interp_method']} interpolation method")# Alternative: specify (x, y) coordinate pairs for baseline points
x_points = np.array([0, 150, 300, 500, 750, 1000])
y_points = data[x_points] # or manually determined y-values
# Stack into (x, y) coordinate pairs
baseline_coords = np.column_stack([x_points, y_points])
# Use cubic spline interpolation for smooth baseline
baseline, params = interp_pts(data, baseline_coords,
interp_method='cubic', x_data=x)
corrected = data - baseline# Adjust BEADS parameters based on data characteristics
noise_level = np.std(np.diff(data)) # Estimate noise from differences
if noise_level > 5:
# High noise: stronger denoising
lam_1_opt = 10
lam_2_opt = 6
elif noise_level > 2:
# Medium noise: balanced approach
lam_1_opt = 5
lam_2_opt = 4
else:
# Low noise: gentle denoising
lam_1_opt = 2
lam_2_opt = 2
baseline, params = beads(data, lam_0=0.5, lam_1=lam_1_opt,
lam_2=lam_2_opt, asymmetry=0.1)
corrected = data - baseline
denoised = params['denoised_signal']
print(f"Optimized for noise level: {noise_level:.2f}")# First pass: BEADS for general baseline and denoising
baseline_beads, params_beads = beads(data, lam_0=0.8, lam_1=3, lam_2=3)
denoised_data = params_beads['denoised_signal']
# Second pass: identify remaining baseline points in denoised data
# (could use peak detection algorithms here)
from scipy.signal import find_peaks
peaks_idx, _ = find_peaks(denoised_data - baseline_beads, height=10)
# Find valleys between peaks as baseline points
baseline_points = []
for i in range(len(peaks_idx) - 1):
start_idx = peaks_idx[i] + 20 # Skip peak region
end_idx = peaks_idx[i + 1] - 20 # Skip next peak region
if start_idx < end_idx:
valley_idx = start_idx + np.argmin(denoised_data[start_idx:end_idx])
baseline_points.append(valley_idx)
# Add endpoints
baseline_points = [0] + baseline_points + [len(data) - 1]
# Refine baseline using interpolation
baseline_final, params_interp = interp_pts(denoised_data, baseline_points,
interp_method='cubic')
corrected_final = denoised_data - baseline_final
print(f"Two-stage correction: BEADS + interpolation with {len(baseline_points)} points")Install with Tessl CLI
npx tessl i tessl/pypi-pybaselines