0
# Core Array Classes
1
2
The fundamental sparse array classes in the sparse library, each providing different storage strategies and performance characteristics for various sparse data patterns.
3
4
## Capabilities
5
6
### SparseArray Base Class
7
8
Abstract base class that defines the common interface for all sparse array implementations.
9
10
```python { .api }
11
class SparseArray:
12
"""
13
Abstract base class for sparse arrays.
14
15
Provides common methods and properties shared by all sparse array formats.
16
"""
17
def __init__(self, shape, fill_value=None): ...
18
19
@property
20
def shape(self): ...
21
@property
22
def ndim(self): ...
23
@property
24
def size(self): ...
25
@property
26
def nnz(self): ... # Number of stored (non-zero) values
27
@property
28
def density(self): ... # Fraction of non-zero elements
29
@property
30
def dtype(self): ...
31
@property
32
def device(self): ... # Always returns "cpu"
33
@property
34
def T(self): ... # Transpose
35
@property
36
def real(self): ... # Real part
37
@property
38
def imag(self): ... # Imaginary part
39
40
def astype(self, dtype, casting="unsafe", copy=True): ...
41
def todense(self): ...
42
def to_device(self, device, /, *, stream=None): ...
43
def reduce(self, method, axis=(0,), keepdims=False, **kwargs): ...
44
def sum(self, axis=None, keepdims=False, dtype=None, out=None): ...
45
def max(self, axis=None, keepdims=False, out=None): ...
46
def min(self, axis=None, keepdims=False, out=None): ...
47
def mean(self, axis=None, keepdims=False, dtype=None, out=None): ...
48
def var(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
49
def std(self, axis=None, dtype=None, out=None, ddof=0, keepdims=False): ...
50
def round(self, decimals=0, out=None): ...
51
def clip(self, min=None, max=None, out=None): ...
52
def conj(self): ...
53
def isinf(self): ...
54
def isnan(self): ...
55
```
56
57
### COO (Coordinate Format)
58
59
The primary sparse array format using coordinate lists to store non-zero elements. Efficient for element-wise operations and construction from scattered data.
60
61
```python { .api }
62
class COO(SparseArray):
63
"""
64
Coordinate format sparse array.
65
66
Stores sparse arrays using coordinate lists for indices and corresponding values.
67
Most general format - efficient for element-wise operations and arithmetic.
68
69
Parameters:
70
- coords: array-like, coordinates of non-zero elements
71
- data: array-like, values at coordinate positions
72
- shape: tuple, shape of the array
73
- has_duplicates: bool, whether coordinates contain duplicates
74
- sorted: bool, whether coordinates are sorted
75
- prune: bool, whether to remove explicit zeros
76
"""
77
78
def __init__(self, coords, data=None, shape=None, has_duplicates=True, sorted=False, prune=False, cache=False, fill_value=None, idx_dtype=None): ...
79
80
@classmethod
81
def from_numpy(cls, x, fill_value=None, idx_dtype=None):
82
"""Create COO array from dense NumPy array"""
83
84
@classmethod
85
def from_scipy_sparse(cls, x, /, *, fill_value=None):
86
"""Create COO array from SciPy sparse matrix"""
87
88
@classmethod
89
def from_iter(cls, x, shape, fill_value=None, dtype=None):
90
"""Create COO array from iterable"""
91
92
@property
93
def coords(self): ... # Coordinate arrays
94
@property
95
def data(self): ... # Value array
96
@property
97
def format(self): ... # Returns "coo"
98
@property
99
def nbytes(self): ... # Memory usage in bytes
100
@property
101
def T(self): ... # Transpose property
102
@property
103
def mT(self): ... # Matrix transpose (last 2 dims)
104
105
def todense(self):
106
"""Convert to dense NumPy array"""
107
108
def copy(self, deep=True):
109
"""Create copy of array"""
110
111
def enable_caching(self):
112
"""Enable operation caching"""
113
114
def transpose(self, axes=None):
115
"""Return transposed array"""
116
117
def swapaxes(self, axis1, axis2):
118
"""Swap two axes"""
119
120
def dot(self, other):
121
"""Dot product with another array"""
122
123
def linear_loc(self):
124
"""Flattened nonzero coordinates"""
125
126
def flatten(self, order="C"):
127
"""Flatten array"""
128
129
def reshape(self, shape, order="C"):
130
"""Return reshaped array"""
131
132
def squeeze(self, axis=None):
133
"""Remove singleton dimensions"""
134
135
def to_scipy_sparse(self, /, *, accept_fv=None):
136
"""Convert to SciPy sparse matrix"""
137
138
def tocsr(self):
139
"""Convert to CSR format"""
140
141
def tocsc(self):
142
"""Convert to CSC format"""
143
144
def asformat(self, format, **kwargs):
145
"""Convert to different sparse format"""
146
147
def broadcast_to(self, shape):
148
"""Broadcast to shape"""
149
150
def maybe_densify(self, max_size=1000, min_density=0.25):
151
"""Conditional densification"""
152
153
def nonzero(self):
154
"""Get nonzero indices"""
155
```
156
157
### DOK (Dictionary of Keys)
158
159
Dictionary-based sparse array format optimized for incremental construction and random access patterns.
160
161
```python { .api }
162
class DOK(SparseArray):
163
"""
164
Dictionary of Keys format sparse array.
165
166
Uses dictionary to map coordinate tuples to values. Efficient for:
167
- Incremental construction (adding elements one by one)
168
- Random access and modification
169
- Building sparse arrays with unknown sparsity patterns
170
171
Parameters:
172
- shape: tuple, shape of the array
173
- data: dict, mapping from coordinate tuples to values
174
- dtype: data type for values
175
- fill_value: default value for unspecified entries
176
"""
177
178
def __init__(self, shape, data=None, dtype=None, fill_value=None): ...
179
180
@classmethod
181
def from_scipy_sparse(cls, x, /, *, fill_value=None):
182
"""Create DOK array from SciPy sparse matrix"""
183
184
@classmethod
185
def from_coo(cls, x):
186
"""Create DOK array from COO array"""
187
188
@classmethod
189
def from_numpy(cls, x):
190
"""Create DOK array from NumPy array"""
191
192
@property
193
def format(self): ... # Returns "dok"
194
@property
195
def nbytes(self): ... # Memory usage in bytes
196
197
def to_coo(self):
198
"""Convert to COO format"""
199
200
def asformat(self, format, **kwargs):
201
"""Convert to different sparse format"""
202
203
def reshape(self, shape, order="C"):
204
"""Return reshaped array"""
205
206
def __getitem__(self, key):
207
"""Get item with fancy indexing support"""
208
209
def __setitem__(self, key, value):
210
"""Set item with fancy indexing support"""
211
```
212
213
### GCXS (Generalized Compressed Sparse)
214
215
Generalized compressed sparse format that encompasses CSR (Compressed Sparse Row) and CSC (Compressed Sparse Column) formats for memory-efficient storage.
216
217
```python { .api }
218
class GCXS(SparseArray):
219
"""
220
Generalized Compressed Sparse format.
221
222
Compressed storage format that generalizes CSR/CSC to arbitrary dimensions.
223
Memory-efficient for:
224
- Matrix-vector operations
225
- Row or column-wise access patterns
226
- Large sparse matrices with structured sparsity
227
228
Parameters:
229
- arg: input data (COO array, tuple of arrays, or existing GCXS)
230
- shape: tuple, shape of the array
231
- compressed_axes: tuple, axes to compress
232
- format: str, 'csr' or 'csc' for 2D arrays
233
"""
234
235
def __init__(self, arg, shape=None, compressed_axes=None, prune=False, fill_value=None, idx_dtype=None): ...
236
237
@classmethod
238
def from_numpy(cls, x, compressed_axes=None, fill_value=None, idx_dtype=None):
239
"""Create GCXS from NumPy array"""
240
241
@classmethod
242
def from_coo(cls, x, compressed_axes=None, idx_dtype=None):
243
"""Create GCXS from COO array"""
244
245
@classmethod
246
def from_scipy_sparse(cls, x, /, *, fill_value=None):
247
"""Create GCXS from SciPy sparse matrix"""
248
249
@classmethod
250
def from_iter(cls, x, shape=None, compressed_axes=None, fill_value=None, idx_dtype=None):
251
"""Create GCXS from iterable"""
252
253
@property
254
def format(self): ... # Returns "gcxs"
255
@property
256
def nbytes(self): ... # Memory usage in bytes
257
@property
258
def compressed_axes(self): ... # Compressed axis dimensions
259
@property
260
def T(self): ... # Transpose property
261
@property
262
def mT(self): ... # Matrix transpose (last 2 dims)
263
264
def copy(self, deep=True):
265
"""Create copy of array"""
266
267
def change_compressed_axes(self, new_compressed_axes):
268
"""Change compression axes"""
269
270
def tocoo(self):
271
"""Convert to COO format"""
272
273
def todok(self):
274
"""Convert to DOK format"""
275
276
def to_scipy_sparse(self, accept_fv=None):
277
"""Convert to SciPy sparse matrix"""
278
279
def asformat(self, format, **kwargs):
280
"""Convert to different sparse format"""
281
282
def maybe_densify(self, max_size=1000, min_density=0.25):
283
"""Conditional densification"""
284
285
def flatten(self, order="C"):
286
"""Flatten array"""
287
288
def reshape(self, shape, order="C", compressed_axes=None):
289
"""Reshape array"""
290
291
def transpose(self, axes=None, compressed_axes=None):
292
"""Return transposed array"""
293
294
def dot(self, other):
295
"""Dot product with another array"""
296
```
297
298
## Usage Examples
299
300
### Creating Sparse Arrays
301
302
```python
303
import sparse
304
import numpy as np
305
306
# Create COO from coordinates and data
307
coords = [[0, 1, 2], [0, 2, 1]] # row, col indices
308
data = [1.0, 2.0, 3.0] # values
309
shape = (3, 3)
310
coo_array = sparse.COO(coords, data, shape)
311
312
# Create COO from dense array
313
dense = np.array([[1, 0, 0], [0, 2, 0], [0, 3, 0]])
314
coo_from_dense = sparse.COO.from_numpy(dense)
315
316
# Create DOK for incremental construction
317
dok_array = sparse.DOK((100, 100), dtype=float)
318
dok_array[10, 20] = 5.0
319
dok_array[50, 80] = -2.5
320
321
# Convert DOK to COO for operations
322
coo_from_dok = dok_array.tocoo()
323
```
324
325
### Array Properties and Conversion
326
327
```python
328
print(f"Shape: {coo_array.shape}") # (3, 3)
329
print(f"Non-zeros: {coo_array.nnz}") # 3
330
print(f"Density: {coo_array.density:.1%}") # 33.3%
331
print(f"Data type: {coo_array.dtype}") # float64
332
333
# Convert between formats
334
dense_result = coo_array.todense() # To NumPy array
335
gcxs_array = sparse.GCXS.from_coo(coo_array) # To compressed format
336
```
337
338
## Format Selection Guidelines
339
340
- **COO**: General-purpose format, best for arithmetic operations and initial array creation
341
- **DOK**: Best for incremental construction when you need to set individual elements
342
- **GCXS**: Most memory-efficient for large arrays, optimal for linear algebra operations
343
- **Conversion**: Arrays can be converted between formats as needed using `.tocoo()`, `.todense()`, etc.