CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-sparse

Sparse n-dimensional arrays for the PyData ecosystem with multiple backend implementations

Pending
Overview
Eval results
Files

core-arrays.mddocs/

Core Array Classes

The fundamental sparse array classes in the sparse library, each providing different storage strategies and performance characteristics for various sparse data patterns.

Capabilities

SparseArray Base Class

Abstract base class that defines the common interface for all sparse array implementations.

class SparseArray:
    """
    Abstract base class for sparse arrays.
    
    Provides common methods and properties shared by all sparse array formats.
    """
    def __init__(self, shape, fill_value=None): ...
    
    @property
    def shape(self): ...
    @property  
    def ndim(self): ...
    @property
    def size(self): ...
    @property
    def nnz(self): ...  # Number of stored (non-zero) values
    @property
    def density(self): ...  # Fraction of non-zero elements
    @property
    def dtype(self): ...
    @property
    def device(self): ...  # Always returns "cpu"
    @property
    def T(self): ...  # Transpose
    @property
    def real(self): ...  # Real part
    @property
    def imag(self): ...  # Imaginary part
    
    def astype(self, dtype, casting="unsafe", copy=True): ...
    def todense(self): ...
    def to_device(self, device, /, *, stream=None): ...
    def reduce(self, method, axis=(0,), keepdims=False, **kwargs): ...
    def sum(self, axis=None, keepdims=False, dtype=None, out=None): ...
    def max(self, axis=None, keepdims=False, out=None): ...
    def min(self, axis=None, keepdims=False, out=None): ...
    def mean(self, axis=None, keepdims=False, dtype=None, out=None): ...
    def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
    def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
    def round(self, decimals=0, out=None): ...
    def clip(self, min=None, max=None, out=None): ...
    def conj(self): ...
    def isinf(self): ...
    def isnan(self): ...

COO (Coordinate Format)

The primary sparse array format using coordinate lists to store non-zero elements. Efficient for element-wise operations and construction from scattered data.

class COO(SparseArray):
    """
    Coordinate format sparse array.
    
    Stores sparse arrays using coordinate lists for indices and corresponding values.
    Most general format - efficient for element-wise operations and arithmetic.
    
    Parameters:
    - coords: array-like, coordinates of non-zero elements  
    - data: array-like, values at coordinate positions
    - shape: tuple, shape of the array
    - has_duplicates: bool, whether coordinates contain duplicates
    - sorted: bool, whether coordinates are sorted
    - prune: bool, whether to remove explicit zeros
    """
    
    def __init__(self, coords, data=None, shape=None, has_duplicates=True, sorted=False, prune=False, cache=False, fill_value=None, idx_dtype=None): ...
    
    @classmethod
    def from_numpy(cls, x, fill_value=None, idx_dtype=None):
        """Create COO array from dense NumPy array"""
    
    @classmethod  
    def from_scipy_sparse(cls, x, /, *, fill_value=None):
        """Create COO array from SciPy sparse matrix"""
    
    @classmethod
    def from_iter(cls, x, shape, fill_value=None, dtype=None):
        """Create COO array from iterable"""
    
    @property
    def coords(self): ...  # Coordinate arrays
    @property  
    def data(self): ...    # Value array
    @property
    def format(self): ...  # Returns "coo"
    @property
    def nbytes(self): ...  # Memory usage in bytes
    @property
    def T(self): ...  # Transpose property
    @property
    def mT(self): ...  # Matrix transpose (last 2 dims)
    
    def todense(self):
        """Convert to dense NumPy array"""
    
    def copy(self, deep=True):
        """Create copy of array"""
    
    def enable_caching(self):
        """Enable operation caching"""
    
    def transpose(self, axes=None):
        """Return transposed array"""
    
    def swapaxes(self, axis1, axis2):
        """Swap two axes"""
    
    def dot(self, other):
        """Dot product with another array"""
    
    def linear_loc(self):
        """Flattened nonzero coordinates"""
    
    def flatten(self, order="C"):
        """Flatten array"""
    
    def reshape(self, shape, order="C"):
        """Return reshaped array"""
    
    def squeeze(self, axis=None):
        """Remove singleton dimensions"""
    
    def to_scipy_sparse(self, /, *, accept_fv=None):
        """Convert to SciPy sparse matrix"""
    
    def tocsr(self):
        """Convert to CSR format"""
    
    def tocsc(self):
        """Convert to CSC format"""
    
    def asformat(self, format, **kwargs):
        """Convert to different sparse format"""
    
    def broadcast_to(self, shape):
        """Broadcast to shape"""
    
    def maybe_densify(self, max_size=1000, min_density=0.25):
        """Conditional densification"""
    
    def nonzero(self):
        """Get nonzero indices"""

DOK (Dictionary of Keys)

Dictionary-based sparse array format optimized for incremental construction and random access patterns.

class DOK(SparseArray):
    """
    Dictionary of Keys format sparse array.
    
    Uses dictionary to map coordinate tuples to values. Efficient for:
    - Incremental construction (adding elements one by one)
    - Random access and modification
    - Building sparse arrays with unknown sparsity patterns
    
    Parameters:
    - shape: tuple, shape of the array
    - data: dict, mapping from coordinate tuples to values
    - dtype: data type for values
    - fill_value: default value for unspecified entries
    """
    
    def __init__(self, shape, data=None, dtype=None, fill_value=None): ...
    
    @classmethod
    def from_scipy_sparse(cls, x, /, *, fill_value=None):
        """Create DOK array from SciPy sparse matrix"""
    
    @classmethod
    def from_coo(cls, x):
        """Create DOK array from COO array"""
    
    @classmethod
    def from_numpy(cls, x):
        """Create DOK array from NumPy array"""
    
    @property
    def format(self): ...  # Returns "dok"
    @property
    def nbytes(self): ...  # Memory usage in bytes
    
    def to_coo(self):
        """Convert to COO format"""
    
    def asformat(self, format, **kwargs):
        """Convert to different sparse format"""
    
    def reshape(self, shape, order="C"):
        """Return reshaped array"""
    
    def __getitem__(self, key):
        """Get item with fancy indexing support"""
    
    def __setitem__(self, key, value):
        """Set item with fancy indexing support"""

GCXS (Generalized Compressed Sparse)

Generalized compressed sparse format that encompasses CSR (Compressed Sparse Row) and CSC (Compressed Sparse Column) formats for memory-efficient storage.

class GCXS(SparseArray):
    """
    Generalized Compressed Sparse format.
    
    Compressed storage format that generalizes CSR/CSC to arbitrary dimensions.
    Memory-efficient for:
    - Matrix-vector operations
    - Row or column-wise access patterns  
    - Large sparse matrices with structured sparsity
    
    Parameters:
    - arg: input data (COO array, tuple of arrays, or existing GCXS)
    - shape: tuple, shape of the array
    - compressed_axes: tuple, axes to compress
    - format: str, 'csr' or 'csc' for 2D arrays
    """
    
    def __init__(self, arg, shape=None, compressed_axes=None, prune=False, fill_value=None, idx_dtype=None): ...
    
    @classmethod
    def from_numpy(cls, x, compressed_axes=None, fill_value=None, idx_dtype=None):
        """Create GCXS from NumPy array"""
    
    @classmethod
    def from_coo(cls, x, compressed_axes=None, idx_dtype=None):
        """Create GCXS from COO array"""
    
    @classmethod
    def from_scipy_sparse(cls, x, /, *, fill_value=None):
        """Create GCXS from SciPy sparse matrix"""
    
    @classmethod
    def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None, idx_dtype=None):
        """Create GCXS from iterable"""
    
    @property
    def format(self): ...  # Returns "gcxs"
    @property
    def nbytes(self): ...  # Memory usage in bytes
    @property
    def compressed_axes(self): ...  # Compressed axis dimensions
    @property
    def T(self): ...  # Transpose property
    @property
    def mT(self): ...  # Matrix transpose (last 2 dims)
    
    def copy(self, deep=True):
        """Create copy of array"""
    
    def change_compressed_axes(self, new_compressed_axes):
        """Change compression axes"""
    
    def tocoo(self):
        """Convert to COO format"""
    
    def todok(self):
        """Convert to DOK format"""
    
    def to_scipy_sparse(self, accept_fv=None):
        """Convert to SciPy sparse matrix"""
    
    def asformat(self, format, **kwargs):
        """Convert to different sparse format"""
    
    def maybe_densify(self, max_size=1000, min_density=0.25):
        """Conditional densification"""
    
    def flatten(self, order="C"):
        """Flatten array"""
    
    def reshape(self, shape, order="C", compressed_axes=None):
        """Reshape array"""
    
    def transpose(self, axes=None, compressed_axes=None):
        """Return transposed array"""
    
    def dot(self, other):
        """Dot product with another array"""

Usage Examples

Creating Sparse Arrays

import sparse
import numpy as np

# Create COO from coordinates and data
coords = [[0, 1, 2], [0, 2, 1]]  # row, col indices
data = [1.0, 2.0, 3.0]           # values
shape = (3, 3)
coo_array = sparse.COO(coords, data, shape)

# Create COO from dense array
dense = np.array([[1, 0, 0], [0, 2, 0], [0, 3, 0]])
coo_from_dense = sparse.COO.from_numpy(dense)

# Create DOK for incremental construction  
dok_array = sparse.DOK((100, 100), dtype=float)
dok_array[10, 20] = 5.0
dok_array[50, 80] = -2.5

# Convert DOK to COO for operations
coo_from_dok = dok_array.tocoo()

Array Properties and Conversion

print(f"Shape: {coo_array.shape}")           # (3, 3)
print(f"Non-zeros: {coo_array.nnz}")         # 3  
print(f"Density: {coo_array.density:.1%}")   # 33.3%
print(f"Data type: {coo_array.dtype}")       # float64

# Convert between formats
dense_result = coo_array.todense()           # To NumPy array
gcxs_array = sparse.GCXS.from_coo(coo_array) # To compressed format

Format Selection Guidelines

  • COO: General-purpose format, best for arithmetic operations and initial array creation
  • DOK: Best for incremental construction when you need to set individual elements
  • GCXS: Most memory-efficient for large arrays, optimal for linear algebra operations
  • Conversion: Arrays can be converted between formats as needed using .tocoo(), .todense(), etc.

Install with Tessl CLI

npx tessl i tessl/pypi-sparse

docs

array-creation.md

array-manipulation.md

core-arrays.md

index.md

io-conversion.md

linear-algebra.md

math-operations.md

reductions.md

tile.json