Sparse n-dimensional arrays for the PyData ecosystem with multiple backend implementations
—
The fundamental sparse array classes in the sparse library, each providing different storage strategies and performance characteristics for various sparse data patterns.
Abstract base class that defines the common interface for all sparse array implementations.
class SparseArray:
"""
Abstract base class for sparse arrays.
Provides common methods and properties shared by all sparse array formats.
"""
def __init__(self, shape, fill_value=None): ...
@property
def shape(self): ...
@property
def ndim(self): ...
@property
def size(self): ...
@property
def nnz(self): ... # Number of stored (non-zero) values
@property
def density(self): ... # Fraction of non-zero elements
@property
def dtype(self): ...
@property
def device(self): ... # Always returns "cpu"
@property
def T(self): ... # Transpose
@property
def real(self): ... # Real part
@property
def imag(self): ... # Imaginary part
def astype(self, dtype, casting="unsafe", copy=True): ...
def todense(self): ...
def to_device(self, device, /, *, stream=None): ...
def reduce(self, method, axis=(0,), keepdims=False, **kwargs): ...
def sum(self, axis=None, keepdims=False, dtype=None, out=None): ...
def max(self, axis=None, keepdims=False, out=None): ...
def min(self, axis=None, keepdims=False, out=None): ...
def mean(self, axis=None, keepdims=False, dtype=None, out=None): ...
def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
def round(self, decimals=0, out=None): ...
def clip(self, min=None, max=None, out=None): ...
def conj(self): ...
def isinf(self): ...
def isnan(self): ...The primary sparse array format using coordinate lists to store non-zero elements. Efficient for element-wise operations and construction from scattered data.
class COO(SparseArray):
"""
Coordinate format sparse array.
Stores sparse arrays using coordinate lists for indices and corresponding values.
Most general format - efficient for element-wise operations and arithmetic.
Parameters:
- coords: array-like, coordinates of non-zero elements
- data: array-like, values at coordinate positions
- shape: tuple, shape of the array
- has_duplicates: bool, whether coordinates contain duplicates
- sorted: bool, whether coordinates are sorted
- prune: bool, whether to remove explicit zeros
"""
def __init__(self, coords, data=None, shape=None, has_duplicates=True, sorted=False, prune=False, cache=False, fill_value=None, idx_dtype=None): ...
@classmethod
def from_numpy(cls, x, fill_value=None, idx_dtype=None):
"""Create COO array from dense NumPy array"""
@classmethod
def from_scipy_sparse(cls, x, /, *, fill_value=None):
"""Create COO array from SciPy sparse matrix"""
@classmethod
def from_iter(cls, x, shape, fill_value=None, dtype=None):
"""Create COO array from iterable"""
@property
def coords(self): ... # Coordinate arrays
@property
def data(self): ... # Value array
@property
def format(self): ... # Returns "coo"
@property
def nbytes(self): ... # Memory usage in bytes
@property
def T(self): ... # Transpose property
@property
def mT(self): ... # Matrix transpose (last 2 dims)
def todense(self):
"""Convert to dense NumPy array"""
def copy(self, deep=True):
"""Create copy of array"""
def enable_caching(self):
"""Enable operation caching"""
def transpose(self, axes=None):
"""Return transposed array"""
def swapaxes(self, axis1, axis2):
"""Swap two axes"""
def dot(self, other):
"""Dot product with another array"""
def linear_loc(self):
"""Flattened nonzero coordinates"""
def flatten(self, order="C"):
"""Flatten array"""
def reshape(self, shape, order="C"):
"""Return reshaped array"""
def squeeze(self, axis=None):
"""Remove singleton dimensions"""
def to_scipy_sparse(self, /, *, accept_fv=None):
"""Convert to SciPy sparse matrix"""
def tocsr(self):
"""Convert to CSR format"""
def tocsc(self):
"""Convert to CSC format"""
def asformat(self, format, **kwargs):
"""Convert to different sparse format"""
def broadcast_to(self, shape):
"""Broadcast to shape"""
def maybe_densify(self, max_size=1000, min_density=0.25):
"""Conditional densification"""
def nonzero(self):
"""Get nonzero indices"""Dictionary-based sparse array format optimized for incremental construction and random access patterns.
class DOK(SparseArray):
"""
Dictionary of Keys format sparse array.
Uses dictionary to map coordinate tuples to values. Efficient for:
- Incremental construction (adding elements one by one)
- Random access and modification
- Building sparse arrays with unknown sparsity patterns
Parameters:
- shape: tuple, shape of the array
- data: dict, mapping from coordinate tuples to values
- dtype: data type for values
- fill_value: default value for unspecified entries
"""
def __init__(self, shape, data=None, dtype=None, fill_value=None): ...
@classmethod
def from_scipy_sparse(cls, x, /, *, fill_value=None):
"""Create DOK array from SciPy sparse matrix"""
@classmethod
def from_coo(cls, x):
"""Create DOK array from COO array"""
@classmethod
def from_numpy(cls, x):
"""Create DOK array from NumPy array"""
@property
def format(self): ... # Returns "dok"
@property
def nbytes(self): ... # Memory usage in bytes
def to_coo(self):
"""Convert to COO format"""
def asformat(self, format, **kwargs):
"""Convert to different sparse format"""
def reshape(self, shape, order="C"):
"""Return reshaped array"""
def __getitem__(self, key):
"""Get item with fancy indexing support"""
def __setitem__(self, key, value):
"""Set item with fancy indexing support"""Generalized compressed sparse format that encompasses CSR (Compressed Sparse Row) and CSC (Compressed Sparse Column) formats for memory-efficient storage.
class GCXS(SparseArray):
"""
Generalized Compressed Sparse format.
Compressed storage format that generalizes CSR/CSC to arbitrary dimensions.
Memory-efficient for:
- Matrix-vector operations
- Row or column-wise access patterns
- Large sparse matrices with structured sparsity
Parameters:
- arg: input data (COO array, tuple of arrays, or existing GCXS)
- shape: tuple, shape of the array
- compressed_axes: tuple, axes to compress
- format: str, 'csr' or 'csc' for 2D arrays
"""
def __init__(self, arg, shape=None, compressed_axes=None, prune=False, fill_value=None, idx_dtype=None): ...
@classmethod
def from_numpy(cls, x, compressed_axes=None, fill_value=None, idx_dtype=None):
"""Create GCXS from NumPy array"""
@classmethod
def from_coo(cls, x, compressed_axes=None, idx_dtype=None):
"""Create GCXS from COO array"""
@classmethod
def from_scipy_sparse(cls, x, /, *, fill_value=None):
"""Create GCXS from SciPy sparse matrix"""
@classmethod
def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None, idx_dtype=None):
"""Create GCXS from iterable"""
@property
def format(self): ... # Returns "gcxs"
@property
def nbytes(self): ... # Memory usage in bytes
@property
def compressed_axes(self): ... # Compressed axis dimensions
@property
def T(self): ... # Transpose property
@property
def mT(self): ... # Matrix transpose (last 2 dims)
def copy(self, deep=True):
"""Create copy of array"""
def change_compressed_axes(self, new_compressed_axes):
"""Change compression axes"""
def tocoo(self):
"""Convert to COO format"""
def todok(self):
"""Convert to DOK format"""
def to_scipy_sparse(self, accept_fv=None):
"""Convert to SciPy sparse matrix"""
def asformat(self, format, **kwargs):
"""Convert to different sparse format"""
def maybe_densify(self, max_size=1000, min_density=0.25):
"""Conditional densification"""
def flatten(self, order="C"):
"""Flatten array"""
def reshape(self, shape, order="C", compressed_axes=None):
"""Reshape array"""
def transpose(self, axes=None, compressed_axes=None):
"""Return transposed array"""
def dot(self, other):
"""Dot product with another array"""import sparse
import numpy as np
# Create COO from coordinates and data
coords = [[0, 1, 2], [0, 2, 1]] # row, col indices
data = [1.0, 2.0, 3.0] # values
shape = (3, 3)
coo_array = sparse.COO(coords, data, shape)
# Create COO from dense array
dense = np.array([[1, 0, 0], [0, 2, 0], [0, 3, 0]])
coo_from_dense = sparse.COO.from_numpy(dense)
# Create DOK for incremental construction
dok_array = sparse.DOK((100, 100), dtype=float)
dok_array[10, 20] = 5.0
dok_array[50, 80] = -2.5
# Convert DOK to COO for operations
coo_from_dok = dok_array.tocoo()print(f"Shape: {coo_array.shape}") # (3, 3)
print(f"Non-zeros: {coo_array.nnz}") # 3
print(f"Density: {coo_array.density:.1%}") # 33.3%
print(f"Data type: {coo_array.dtype}") # float64
# Convert between formats
dense_result = coo_array.todense() # To NumPy array
gcxs_array = sparse.GCXS.from_coo(coo_array) # To compressed format.tocoo(), .todense(), etc.Install with Tessl CLI
npx tessl i tessl/pypi-sparse