or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-operations.mdcuda-interface.mdcustom-kernels.mdfft-operations.mdindex.mdlinear-algebra.mdmath-functions.mdrandom-numbers.mdstatistics-sorting.md

statistics-sorting.mddocs/

0

# Statistics and Sorting

1

2

Statistical analysis and sorting operations on GPU arrays. Provides descriptive statistics, correlations, histograms, and various sorting algorithms while handling NaN values appropriately and supporting axis-specific operations.

3

4

## Capabilities

5

6

### Descriptive Statistics

7

8

```python { .api }

9

def mean(a, axis=None, dtype=None, out=None, keepdims=False):

10

"""

11

Compute arithmetic mean along specified axis.

12

13

Parameters:

14

- a: input array

15

- axis: axis or axes along which to compute mean

16

- dtype: data type for computation

17

- out: output array

18

- keepdims: keep dimensions of original array

19

20

Returns:

21

cupy.ndarray: arithmetic mean

22

"""

23

24

def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

25

"""

26

Compute standard deviation along specified axis.

27

28

Parameters:

29

- a: input array

30

- axis: axis or axes along which to compute std

31

- dtype: data type for computation

32

- out: output array

33

- ddof: degrees of freedom correction

34

- keepdims: keep dimensions

35

36

Returns:

37

cupy.ndarray: standard deviation

38

"""

39

40

def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

41

"""Compute variance along specified axis."""

42

43

def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):

44

"""Compute median along specified axis."""

45

46

def average(a, axis=None, weights=None, returned=False):

47

"""

48

Compute weighted average along specified axis.

49

50

Parameters:

51

- a: input array

52

- axis: axis along which to average

53

- weights: weights for averaging

54

- returned: return sum of weights

55

56

Returns:

57

cupy.ndarray or tuple: weighted average

58

"""

59

60

def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):

61

"""Compute mean ignoring NaNs."""

62

63

def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

64

"""Compute standard deviation ignoring NaNs."""

65

66

def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

67

"""Compute variance ignoring NaNs."""

68

69

def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):

70

"""Compute median ignoring NaNs."""

71

```

72

73

### Order Statistics

74

75

```python { .api }

76

def amin(a, axis=None, out=None, keepdims=False, initial=None, where=True):

77

"""Return minimum values along axis."""

78

79

def amax(a, axis=None, out=None, keepdims=False, initial=None, where=True):

80

"""Return maximum values along axis."""

81

82

def min(a, axis=None, out=None, keepdims=False, initial=None, where=True):

83

"""Alias for amin."""

84

85

def max(a, axis=None, out=None, keepdims=False, initial=None, where=True):

86

"""Alias for amax."""

87

88

def nanmin(a, axis=None, out=None, keepdims=False):

89

"""Return minimum values ignoring NaNs."""

90

91

def nanmax(a, axis=None, out=None, keepdims=False):

92

"""Return maximum values ignoring NaNs."""

93

94

def ptp(a, axis=None, out=None, keepdims=False):

95

"""

96

Range of values (maximum - minimum) along axis.

97

98

Parameters:

99

- a: input array

100

- axis: axis along which to compute range

101

- out: output array

102

- keepdims: keep dimensions

103

104

Returns:

105

cupy.ndarray: peak-to-peak values

106

"""

107

108

def percentile(a, q, axis=None, out=None, overwrite_input=False,

109

method='linear', keepdims=False):

110

"""

111

Compute qth percentile along specified axis.

112

113

Parameters:

114

- a: input array

115

- q: percentile(s) to compute

116

- axis: axis along which to compute percentiles

117

- out: output array

118

- overwrite_input: allow input modification

119

- method: interpolation method

120

- keepdims: keep dimensions

121

122

Returns:

123

cupy.ndarray: qth percentiles

124

"""

125

126

def quantile(a, q, axis=None, out=None, overwrite_input=False,

127

method='linear', keepdims=False):

128

"""Compute quantiles along specified axis."""

129

```

130

131

### Correlations

132

133

```python { .api }

134

def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):

135

"""

136

Return Pearson correlation coefficients.

137

138

Parameters:

139

- x: input array

140

- y: additional input array

141

- rowvar: whether rows represent variables

142

- bias: bias correction (deprecated)

143

- ddof: degrees of freedom (deprecated)

144

145

Returns:

146

cupy.ndarray: correlation coefficient matrix

147

"""

148

149

def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):

150

"""

151

Estimate covariance matrix.

152

153

Parameters:

154

- m: input array

155

- y: additional input array

156

- rowvar: whether rows represent variables

157

- bias: use biased estimator

158

- ddof: degrees of freedom correction

159

- fweights: frequency weights

160

- aweights: analytic weights

161

162

Returns:

163

cupy.ndarray: covariance matrix

164

"""

165

166

def correlate(a, v, mode='valid'):

167

"""

168

Cross-correlation of two 1-dimensional sequences.

169

170

Parameters:

171

- a: first input sequence

172

- v: second input sequence

173

- mode: output size ('full', 'valid', 'same')

174

175

Returns:

176

cupy.ndarray: cross-correlation

177

"""

178

```

179

180

### Histograms

181

182

```python { .api }

183

def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):

184

"""

185

Compute histogram of a set of data.

186

187

Parameters:

188

- a: input data

189

- bins: number of bins or bin edges

190

- range: lower and upper range of bins

191

- normed: normalize histogram (deprecated)

192

- weights: weights for each value

193

- density: normalize to create probability density

194

195

Returns:

196

tuple: (hist, bin_edges)

197

"""

198

199

def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):

200

"""

201

Compute 2D histogram of two data samples.

202

203

Parameters:

204

- x, y: input data arrays

205

- bins: number of bins or bin edges

206

- range: array of ranges for each dimension

207

- normed: normalize histogram (deprecated)

208

- weights: weights for each sample

209

- density: normalize to create probability density

210

211

Returns:

212

tuple: (H, xedges, yedges)

213

"""

214

215

def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):

216

"""

217

Compute multidimensional histogram.

218

219

Parameters:

220

- sample: input data array

221

- bins: number of bins for each dimension

222

- range: sequence of ranges for each dimension

223

- normed: normalize histogram (deprecated)

224

- weights: weights for each sample

225

- density: normalize to create probability density

226

227

Returns:

228

tuple: (H, edges)

229

"""

230

231

def bincount(x, weights=None, minlength=0):

232

"""

233

Count occurrences of each value in array of non-negative ints.

234

235

Parameters:

236

- x: input array of non-negative integers

237

- weights: weights for each value

238

- minlength: minimum number of bins

239

240

Returns:

241

cupy.ndarray: counts for each value

242

"""

243

244

def digitize(x, bins, right=False):

245

"""

246

Return indices of bins to which each value belongs.

247

248

Parameters:

249

- x: input array

250

- bins: array of bins

251

- right: whether intervals include right edge

252

253

Returns:

254

cupy.ndarray: bin indices

255

"""

256

```

257

258

### Sorting

259

260

```python { .api }

261

def sort(a, axis=-1, kind=None, order=None):

262

"""

263

Return sorted copy of array.

264

265

Parameters:

266

- a: input array

267

- axis: axis along which to sort

268

- kind: sorting algorithm (ignored, uses merge sort)

269

- order: field order for structured arrays

270

271

Returns:

272

cupy.ndarray: sorted array

273

"""

274

275

def argsort(a, axis=-1, kind=None, order=None):

276

"""

277

Return indices that would sort array.

278

279

Parameters:

280

- a: input array

281

- axis: axis along which to sort

282

- kind: sorting algorithm

283

- order: field order for structured arrays

284

285

Returns:

286

cupy.ndarray: indices for sorted array

287

"""

288

289

def lexsort(keys, axis=-1):

290

"""

291

Perform indirect stable sort using multiple keys.

292

293

Parameters:

294

- keys: sequence of arrays to use as sort keys

295

- axis: axis along which to sort

296

297

Returns:

298

cupy.ndarray: indices for lexicographically sorted array

299

"""

300

301

def msort(a):

302

"""

303

Return sorted copy along first axis.

304

305

Parameters:

306

- a: input array

307

308

Returns:

309

cupy.ndarray: sorted array

310

"""

311

312

def sort_complex(a):

313

"""

314

Sort complex array using real part first, then imaginary part.

315

316

Parameters:

317

- a: input complex array

318

319

Returns:

320

cupy.ndarray: sorted complex array

321

"""

322

323

def partition(a, kth, axis=-1, kind='introselect', order=None):

324

"""

325

Return partitioned copy where kth element is in correct position.

326

327

Parameters:

328

- a: input array

329

- kth: element index for partitioning

330

- axis: axis along which to partition

331

- kind: partitioning algorithm

332

- order: field order for structured arrays

333

334

Returns:

335

cupy.ndarray: partitioned array

336

"""

337

338

def argpartition(a, kth, axis=-1, kind='introselect', order=None):

339

"""Return indices that would partition array."""

340

```

341

342

### Searching

343

344

```python { .api }

345

def argmax(a, axis=None, out=None):

346

"""

347

Return indices of maximum values along axis.

348

349

Parameters:

350

- a: input array

351

- axis: axis along which to search

352

- out: output array

353

354

Returns:

355

cupy.ndarray: indices of maximum values

356

"""

357

358

def argmin(a, axis=None, out=None):

359

"""Return indices of minimum values along axis."""

360

361

def nanargmax(a, axis=None):

362

"""Return indices of maximum values ignoring NaNs."""

363

364

def nanargmin(a, axis=None):

365

"""Return indices of minimum values ignoring NaNs."""

366

367

def argwhere(a):

368

"""

369

Find indices of array elements that are non-zero.

370

371

Parameters:

372

- a: input array

373

374

Returns:

375

cupy.ndarray: indices of non-zero elements

376

"""

377

378

def nonzero(a):

379

"""

380

Return indices of elements that are non-zero.

381

382

Parameters:

383

- a: input array

384

385

Returns:

386

tuple: arrays of indices

387

"""

388

389

def flatnonzero(a):

390

"""Return indices of flattened array that are non-zero."""

391

392

def where(condition, x=None, y=None):

393

"""

394

Return elements chosen from x or y depending on condition.

395

396

Parameters:

397

- condition: boolean array

398

- x: values where condition is True

399

- y: values where condition is False

400

401

Returns:

402

cupy.ndarray: array with elements from x or y

403

"""

404

405

def searchsorted(a, v, side='left', sorter=None):

406

"""

407

Find indices where elements should be inserted to maintain order.

408

409

Parameters:

410

- a: sorted input array

411

- v: values to insert

412

- side: insertion side ('left' or 'right')

413

- sorter: array of indices that sort a

414

415

Returns:

416

cupy.ndarray: insertion indices

417

"""

418

```

419

420

### Counting

421

422

```python { .api }

423

def count_nonzero(a, axis=None, keepdims=False):

424

"""

425

Count number of non-zero values in array.

426

427

Parameters:

428

- a: input array

429

- axis: axis or axes to count along

430

- keepdims: keep dimensions of original array

431

432

Returns:

433

int or cupy.ndarray: count of non-zero values

434

"""

435

```

436

437

## Usage Examples

438

439

### Basic Statistics

440

441

```python

442

import cupy as cp

443

444

# Create sample data

445

data = cp.random.normal(10, 2, (1000, 50))

446

447

# Compute basic statistics

448

mean_val = cp.mean(data)

449

std_val = cp.std(data)

450

var_val = cp.var(data)

451

median_val = cp.median(data)

452

453

print(f"Mean: {mean_val:.4f}")

454

print(f"Std: {std_val:.4f}")

455

print(f"Variance: {var_val:.4f}")

456

print(f"Median: {median_val:.4f}")

457

458

# Statistics along specific axis

459

row_means = cp.mean(data, axis=1) # Mean of each row

460

col_stds = cp.std(data, axis=0) # Std of each column

461

462

print(f"Row means shape: {row_means.shape}")

463

print(f"Column stds shape: {col_stds.shape}")

464

```

465

466

### Order Statistics

467

468

```python

469

import cupy as cp

470

471

# Create test data

472

data = cp.random.random((100, 100))

473

474

# Find min/max values

475

min_val = cp.min(data)

476

max_val = cp.max(data)

477

range_val = cp.ptp(data) # peak-to-peak

478

479

print(f"Min: {min_val:.4f}")

480

print(f"Max: {max_val:.4f}")

481

print(f"Range: {range_val:.4f}")

482

483

# Percentiles

484

percentiles = cp.percentile(data, [25, 50, 75, 90, 95])

485

print(f"Percentiles (25,50,75,90,95): {percentiles}")

486

487

# Quantiles (same as percentiles but with 0-1 scale)

488

quantiles = cp.quantile(data, [0.25, 0.5, 0.75])

489

print(f"Quantiles (0.25,0.5,0.75): {quantiles}")

490

```

491

492

### Handling NaN Values

493

494

```python

495

import cupy as cp

496

497

# Create data with NaN values

498

data = cp.random.random((100, 100))

499

data[cp.random.random((100, 100)) < 0.1] = cp.nan # 10% NaN values

500

501

# Regular statistics (will return NaN if any NaN present)

502

regular_mean = cp.mean(data)

503

regular_std = cp.std(data)

504

505

# NaN-aware statistics

506

nan_mean = cp.nanmean(data)

507

nan_std = cp.nanstd(data)

508

nan_min = cp.nanmin(data)

509

nan_max = cp.nanmax(data)

510

511

print(f"Regular mean: {regular_mean}")

512

print(f"NaN-aware mean: {nan_mean:.4f}")

513

print(f"NaN-aware std: {nan_std:.4f}")

514

print(f"NaN-aware range: {nan_min:.4f} to {nan_max:.4f}")

515

```

516

517

### Correlation Analysis

518

519

```python

520

import cupy as cp

521

522

# Create correlated data

523

n_samples = 1000

524

x = cp.random.normal(0, 1, n_samples)

525

y = 0.8 * x + 0.6 * cp.random.normal(0, 1, n_samples) # Correlated with x

526

z = cp.random.normal(0, 1, n_samples) # Independent

527

528

# Stack into matrix (each row is a variable)

529

data = cp.stack([x, y, z])

530

531

# Compute correlation matrix

532

corr_matrix = cp.corrcoef(data)

533

print("Correlation matrix:")

534

print(corr_matrix)

535

536

# Compute covariance matrix

537

cov_matrix = cp.cov(data)

538

print("\nCovariance matrix:")

539

print(cov_matrix)

540

541

# Cross-correlation of two sequences

542

x_seq = cp.random.random(100)

543

y_seq = cp.random.random(100)

544

cross_corr = cp.correlate(x_seq, y_seq, mode='full')

545

print(f"\nCross-correlation shape: {cross_corr.shape}")

546

```

547

548

### Histograms

549

550

```python

551

import cupy as cp

552

553

# Create sample data

554

data = cp.random.normal(0, 1, 10000)

555

556

# 1D histogram

557

hist, bin_edges = cp.histogram(data, bins=50, range=(-4, 4))

558

print(f"Histogram shape: {hist.shape}")

559

print(f"Bin edges shape: {bin_edges.shape}")

560

561

# Weighted histogram

562

weights = cp.random.random(len(data))

563

weighted_hist, _ = cp.histogram(data, bins=50, weights=weights)

564

565

# 2D histogram

566

x = cp.random.normal(0, 1, 5000)

567

y = cp.random.normal(0, 1, 5000)

568

hist_2d, x_edges, y_edges = cp.histogram2d(x, y, bins=30)

569

print(f"2D histogram shape: {hist_2d.shape}")

570

571

# Count occurrences

572

integers = cp.random.randint(0, 10, 1000)

573

counts = cp.bincount(integers)

574

print(f"Counts: {counts}")

575

576

# Digitize continuous data

577

bin_indices = cp.digitize(data, bins=cp.linspace(-3, 3, 10))

578

print(f"Bin indices range: {cp.min(bin_indices)} to {cp.max(bin_indices)}")

579

```

580

581

### Sorting Operations

582

583

```python

584

import cupy as cp

585

586

# Create unsorted data

587

data = cp.random.random((5, 10))

588

589

# Sort array

590

sorted_data = cp.sort(data, axis=1) # Sort each row

591

print("Original data (first row):")

592

print(data[0])

593

print("Sorted data (first row):")

594

print(sorted_data[0])

595

596

# Get sorting indices

597

sort_indices = cp.argsort(data, axis=1)

598

print("Sort indices (first row):")

599

print(sort_indices[0])

600

601

# Verify sorting

602

reconstructed = data[0, sort_indices[0]]

603

print("Reconstructed (should match sorted):")

604

print(reconstructed)

605

606

# Multi-dimensional sort

607

data_3d = cp.random.random((10, 20, 30))

608

sorted_3d = cp.sort(data_3d, axis=2) # Sort along last axis

609

```

610

611

### Advanced Sorting

612

613

```python

614

import cupy as cp

615

616

# Lexicographic sorting

617

# Sort by multiple keys (e.g., sort by y first, then by x)

618

x = cp.array([1, 3, 2, 1, 3, 2])

619

y = cp.array([3, 1, 2, 1, 3, 1])

620

621

# Sort by y first, then x (note order: primary key last)

622

lex_indices = cp.lexsort([x, y])

623

print("Lexsort indices:", lex_indices)

624

print("x sorted:", x[lex_indices])

625

print("y sorted:", y[lex_indices])

626

627

# Partial sorting (partition)

628

large_array = cp.random.random(1000)

629

k = 100 # Find 100 smallest elements

630

631

# Partition so that k smallest elements are in first k positions

632

partitioned = cp.partition(large_array, k)

633

print(f"100th smallest element: {partitioned[k-1]}")

634

print(f"Verification - max of first 100: {cp.max(partitioned[:k])}")

635

print(f"Verification - min of last 900: {cp.min(partitioned[k:])}")

636

```

637

638

### Search Operations

639

640

```python

641

import cupy as cp

642

643

# Create test data

644

data = cp.random.random((50, 50))

645

646

# Find locations of extreme values

647

max_pos = cp.argmax(data)

648

min_pos = cp.argmin(data)

649

650

# Convert flat indices to 2D coordinates

651

max_coords = cp.unravel_index(max_pos, data.shape)

652

min_coords = cp.unravel_index(min_pos, data.shape)

653

654

print(f"Max value {cp.max(data):.4f} at position {max_coords}")

655

print(f"Min value {cp.min(data):.4f} at position {min_coords}")

656

657

# Find all positions above threshold

658

threshold = 0.9

659

high_positions = cp.argwhere(data > threshold)

660

print(f"Found {len(high_positions)} positions above {threshold}")

661

662

# Search in sorted array

663

sorted_array = cp.sort(cp.random.random(1000))

664

values_to_find = cp.array([0.1, 0.5, 0.9])

665

insertion_points = cp.searchsorted(sorted_array, values_to_find)

666

print(f"Insertion points: {insertion_points}")

667

668

# Count non-zero elements

669

sparse_data = cp.random.random((100, 100))

670

sparse_data[sparse_data < 0.9] = 0 # Make 90% zeros

671

nonzero_count = cp.count_nonzero(sparse_data)

672

print(f"Non-zero elements: {nonzero_count} out of {sparse_data.size}")

673

```

674

675

### Performance Comparison

676

677

```python

678

import cupy as cp

679

import numpy as np

680

import time

681

682

# Large dataset for performance testing

683

n = 10**7

684

data_gpu = cp.random.random(n)

685

data_cpu = cp.asnumpy(data_gpu)

686

687

# GPU sorting

688

start = time.time()

689

sorted_gpu = cp.sort(data_gpu)

690

cp.cuda.Device().synchronize()

691

gpu_time = time.time() - start

692

693

# CPU sorting

694

start = time.time()

695

sorted_cpu = np.sort(data_cpu)

696

cpu_time = time.time() - start

697

698

print(f"GPU sort time: {gpu_time:.4f}s")

699

print(f"CPU sort time: {cpu_time:.4f}s")

700

print(f"Speedup: {cpu_time/gpu_time:.2f}x")

701

702

# Verify correctness

703

gpu_result_cpu = cp.asnumpy(sorted_gpu)

704

max_diff = np.max(np.abs(gpu_result_cpu - sorted_cpu))

705

print(f"Max difference: {max_diff}")

706

```