Sparse n-dimensional arrays for the PyData ecosystem with multiple backend implementations
—
Functions for saving, loading, and converting sparse arrays between different formats and libraries. These operations enable interoperability with NumPy, SciPy, and file-based storage systems.
Functions for persistent storage of sparse arrays using compressed formats.
def save_npz(file, *args, **kwargs):
"""
Save sparse arrays to compressed NumPy .npz format.
Stores multiple sparse arrays in a single compressed file with
efficient sparse representation. Arrays are saved with their
coordinate and data information.
Parameters:
- file: str or file-like, output file path or file object
- args: sparse arrays to save (positional arguments)
- kwargs: named sparse arrays to save (keyword arguments)
Returns:
None (saves to file)
"""
def load_npz(file):
"""
Load sparse array from compressed NumPy .npz format.
Reconstructs sparse array from stored coordinate and data information.
Returns the sparse array in COO format.
Parameters:
- file: str or file-like, input file path or file object
Returns:
Sparse COO array loaded from file
"""Functions for converting between sparse arrays and NumPy dense arrays.
def asnumpy(a):
"""
Convert sparse array to NumPy dense array.
Creates dense NumPy array representation with zeros filled in.
Equivalent to calling .todense() on sparse array.
Parameters:
- a: sparse array, input sparse array to convert
Returns:
numpy.ndarray with dense representation of sparse array
"""Functions for converting between different data types while preserving sparsity.
def astype(a, dtype):
"""
Cast sparse array to specified data type.
Converts array elements to new data type while preserving
sparse structure. May affect precision or range of values.
Parameters:
- a: sparse array, input array to convert
- dtype: numpy dtype, target data type
Returns:
Sparse array with elements cast to new data type
"""
def can_cast(from_, to):
"""
Check if casting between data types is safe.
Determines whether values can be cast from one data type
to another without loss of precision or range.
Parameters:
- from_: numpy dtype, source data type
- to: numpy dtype, target data type
Returns:
bool, True if casting is safe, False otherwise
"""
def result_type(*arrays_and_dtypes):
"""
Determine result data type for operations on multiple arrays.
Computes the common data type that would result from operations
involving multiple arrays or data types.
Parameters:
- arrays_and_dtypes: sparse arrays or numpy dtypes
Returns:
numpy dtype, common result type for operations
"""Functions for accessing real and imaginary components of complex arrays.
def real(a):
"""
Extract real part of complex sparse array.
For real arrays, returns copy of original array.
For complex arrays, returns sparse array containing only real components.
Parameters:
- a: sparse array, input array (real or complex)
Returns:
Sparse array containing real parts of input elements
"""
def imag(a):
"""
Extract imaginary part of complex sparse array.
For real arrays, returns sparse array of zeros with same shape.
For complex arrays, returns sparse array containing only imaginary components.
Parameters:
- a: sparse array, input array (real or complex)
Returns:
Sparse array containing imaginary parts of input elements
"""Functions for testing array properties and validity.
def isfinite(x):
"""
Test element-wise for finite values (not inf or NaN).
Parameters:
- x: sparse array, input array to test
Returns:
Sparse boolean array, True where elements are finite
"""
def isinf(x):
"""
Test element-wise for positive or negative infinity.
Parameters:
- x: sparse array, input array to test
Returns:
Sparse boolean array, True where elements are infinite
"""
def isnan(x):
"""
Test element-wise for NaN (Not a Number) values.
Parameters:
- x: sparse array, input array to test
Returns:
Sparse boolean array, True where elements are NaN
"""
def isneginf(x):
"""
Test element-wise for negative infinity.
Parameters:
- x: sparse array, input array to test
Returns:
Sparse boolean array, True where elements are negative infinity
"""
def isposinf(x):
"""
Test element-wise for positive infinity.
Parameters:
- x: sparse array, input array to test
Returns:
Sparse boolean array, True where elements are positive infinity
"""Advanced utility functions for element-wise operations.
def elemwise(func, *args, **kwargs):
"""
Apply arbitrary function element-wise to sparse arrays.
Applies custom function to corresponding elements of input arrays.
Function should handle scalar inputs and return scalar outputs.
Parameters:
- func: callable, function to apply element-wise
- args: sparse arrays, input arrays for function
- kwargs: additional keyword arguments for function
Returns:
Sparse array with function applied element-wise
"""import sparse
import numpy as np
import tempfile
import os
# Create test sparse arrays
array1 = sparse.COO.from_numpy(np.array([[1, 0, 3], [0, 2, 0]]))
array2 = sparse.random((5, 5), density=0.3)
array3 = sparse.eye(4)
# Save single array
with tempfile.NamedTemporaryFile(suffix='.npz', delete=False) as f:
sparse.save_npz(f.name, array1)
# Load single array
loaded_array1 = sparse.load_npz(f.name)
print(f"Arrays equal: {np.array_equal(array1.todense(), loaded_array1.todense())}")
os.unlink(f.name) # Clean up
# Save multiple arrays with names
with tempfile.NamedTemporaryFile(suffix='.npz', delete=False) as f:
sparse.save_npz(f.name,
main_array=array1,
random_array=array2,
identity=array3)
# Load returns dictionary for multiple arrays
loaded_data = sparse.load_npz(f.name)
print(f"Loaded arrays: {list(loaded_data.keys())}")
os.unlink(f.name) # Clean up
print(f"File I/O preserves sparsity and data integrity")# Create sparse array
sparse_array = sparse.COO.from_numpy(
np.array([[1, 0, 3, 0], [0, 2, 0, 4], [5, 0, 0, 6]])
)
# Convert to dense NumPy array
dense_array = sparse.asnumpy(sparse_array)
dense_array_alt = sparse_array.todense() # Alternative method
print(f"Sparse array nnz: {sparse_array.nnz}")
print(f"Dense array shape: {dense_array.shape}")
print(f"Arrays identical: {np.array_equal(dense_array, dense_array_alt)}")
# Memory usage comparison
sparse_memory = sparse_array.data.nbytes + sparse_array.coords.nbytes
dense_memory = dense_array.nbytes
print(f"Sparse memory: {sparse_memory} bytes")
print(f"Dense memory: {dense_memory} bytes")
print(f"Compression ratio: {dense_memory / sparse_memory:.1f}x")# Create arrays with different data types
int_array = sparse.COO.from_numpy(np.array([[1, 0, 3], [0, 2, 0]], dtype=np.int32))
float_array = sparse.astype(int_array, np.float64)
complex_array = sparse.astype(float_array, np.complex128)
print(f"Original dtype: {int_array.dtype}")
print(f"Float dtype: {float_array.dtype}")
print(f"Complex dtype: {complex_array.dtype}")
# Check casting safety
safe_int_to_float = sparse.can_cast(np.int32, np.float64)
unsafe_float_to_int = sparse.can_cast(np.float64, np.int32)
print(f"Safe int32 -> float64: {safe_int_to_float}") # True
print(f"Safe float64 -> int32: {unsafe_float_to_int}") # False
# Determine result types for operations
result_type = sparse.result_type(int_array, float_array)
print(f"Result type for int32 + float64: {result_type}") # float64# Create complex sparse array
real_part = sparse.random((3, 4), density=0.5)
imag_part = sparse.random((3, 4), density=0.3)
complex_array = real_part + 1j * imag_part
print(f"Complex array dtype: {complex_array.dtype}")
print(f"Complex array nnz: {complex_array.nnz}")
# Extract components
real_component = sparse.real(complex_array)
imag_component = sparse.imag(complex_array)
print(f"Real component nnz: {real_component.nnz}")
print(f"Imaginary component nnz: {imag_component.nnz}")
# Verify reconstruction
reconstructed = real_component + 1j * imag_component
print(f"Reconstruction accurate: {np.allclose(complex_array.todense(), reconstructed.todense())}")
# Real arrays
real_array = sparse.random((2, 3), density=0.4)
real_from_real = sparse.real(real_array) # Copy of original
imag_from_real = sparse.imag(real_array) # Array of zeros
print(f"Real from real equal: {np.array_equal(real_array.todense(), real_from_real.todense())}")
print(f"Imag from real nnz: {imag_from_real.nnz}") # Should be 0# Create array with special values
test_data = np.array([[1.0, np.inf, 3.0], [0.0, np.nan, -np.inf]])
special_array = sparse.COO.from_numpy(test_data)
# Test for different conditions
finite_mask = sparse.isfinite(special_array)
inf_mask = sparse.isinf(special_array)
nan_mask = sparse.isnan(special_array)
neginf_mask = sparse.isneginf(special_array)
posinf_mask = sparse.isposinf(special_array)
print("Special value detection:")
print(f"Finite values: {np.sum(finite_mask.todense())}") # Count finite
print(f"Infinite values: {np.sum(inf_mask.todense())}") # Count inf
print(f"NaN values: {np.sum(nan_mask.todense())}") # Count NaN
print(f"Negative inf: {np.sum(neginf_mask.todense())}") # Count -inf
print(f"Positive inf: {np.sum(posinf_mask.todense())}") # Count +inf
# Use masks for filtering
finite_only = sparse.where(finite_mask, special_array, 0)
print(f"Finite-only array nnz: {finite_only.nnz}")# Define custom functions
def sigmoid(x):
"""Sigmoid activation function"""
return 1 / (1 + np.exp(-x))
def custom_transform(x, scale=1.0, offset=0.0):
"""Custom transformation with parameters"""
return scale * np.tanh(x) + offset
# Apply custom functions element-wise
input_array = sparse.random((10, 10), density=0.2)
# Simple function application
sigmoid_result = sparse.elemwise(sigmoid, input_array)
print(f"Sigmoid result nnz: {sigmoid_result.nnz}")
# Function with additional parameters
transformed = sparse.elemwise(custom_transform, input_array, scale=2.0, offset=0.5)
print(f"Transformed result nnz: {transformed.nnz}")
# Multi-argument custom function
def weighted_sum(x, y, w1=0.5, w2=0.5):
return w1 * x + w2 * y
array_a = sparse.random((5, 5), density=0.3)
array_b = sparse.random((5, 5), density=0.3)
weighted_result = sparse.elemwise(weighted_sum, array_a, array_b, w1=0.7, w2=0.3)
print(f"Weighted sum result nnz: {weighted_result.nnz}")# Batch conversion and type management
arrays = [sparse.random((20, 20), density=0.05) for _ in range(5)]
# Convert all to same type
common_dtype = np.float32
converted_arrays = [sparse.astype(arr, common_dtype) for arr in arrays]
# Verify type consistency
dtypes = [arr.dtype for arr in converted_arrays]
print(f"All same dtype: {all(dt == common_dtype for dt in dtypes)}")
# Memory usage comparison
original_memory = sum(arr.data.nbytes + arr.coords.nbytes for arr in arrays)
converted_memory = sum(arr.data.nbytes + arr.coords.nbytes for arr in converted_arrays)
print(f"Memory change: {converted_memory / original_memory:.2f}x")
# Result type prediction for operations
result_types = []
for i in range(len(arrays)):
for j in range(i + 1, len(arrays)):
rt = sparse.result_type(arrays[i], arrays[j])
result_types.append(rt)
print(f"Operation result types: {set(str(rt) for rt in result_types)}")# Although not directly in sparse API, demonstrate typical workflow
try:
from scipy import sparse as sp
# Create sparse array
sparse_coo = sparse.random((100, 100), density=0.02)
# Convert to SciPy format via dense (for demonstration)
dense_temp = sparse.asnumpy(sparse_coo)
scipy_csr = sp.csr_matrix(dense_temp)
# Convert back to sparse via dense
back_to_sparse = sparse.COO.from_numpy(scipy_csr.toarray())
print(f"Round-trip successful: {np.allclose(sparse_coo.todense(), back_to_sparse.todense())}")
print(f"SciPy CSR nnz: {scipy_csr.nnz}")
print(f"Sparse COO nnz: {sparse_coo.nnz}")
except ImportError:
print("SciPy not available for integration example")# Efficient I/O for large arrays
large_array = sparse.random((10000, 10000), density=0.001)
print(f"Large array: {large_array.shape}, nnz: {large_array.nnz}")
# File I/O timing
import time
with tempfile.NamedTemporaryFile(suffix='.npz', delete=False) as f:
start_time = time.time()
sparse.save_npz(f.name, large_array)
save_time = time.time() - start_time
start_time = time.time()
loaded = sparse.load_npz(f.name)
load_time = time.time() - start_time
print(f"Save time: {save_time:.3f}s")
print(f"Load time: {load_time:.3f}s")
print(f"Data integrity: {np.array_equal(large_array.coords, loaded.coords)}")
os.unlink(f.name)
# Type conversion efficiency
start_time = time.time()
converted = sparse.astype(large_array, np.float32)
conversion_time = time.time() - start_time
print(f"Type conversion time: {conversion_time:.3f}s")asnumpy() for NumPy compatibility when dense representation is acceptableelemwise() should handle scalar inputs and outputscan_cast() prevents unexpected precision lossInstall with Tessl CLI
npx tessl i tessl/pypi-sparse