or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdcuda-integration.mdcustom-kernels.mddata-types.mdextended-functionality.mdfft.mdindex.mdio-functions.mdlinear-algebra.mdlogic-functions.mdmathematical-functions.mdpolynomial.mdrandom.mdstatistics.mdutilities.md

statistics.mddocs/

0

# Statistics and Sorting

1

2

Statistical functions, sorting algorithms, and searching operations for data analysis and processing. All functions are GPU-accelerated and maintain NumPy compatibility.

3

4

## Capabilities

5

6

### Descriptive Statistics

7

8

Functions for computing statistical measures of data distributions.

9

10

```python { .api }

11

def mean(a, axis=None, dtype=None, out=None, keepdims=False):

12

"""

13

Compute arithmetic mean along specified axis.

14

15

Parameters:

16

- a: array-like, input array

17

- axis: None or int or tuple, axis to compute mean over

18

- dtype: data type, output data type

19

- out: cupy.ndarray, output array

20

- keepdims: bool, keep reduced dimensions

21

22

Returns:

23

cupy.ndarray: Mean values

24

"""

25

26

def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

27

"""

28

Compute standard deviation along specified axis.

29

30

Parameters:

31

- a: array-like, input array

32

- axis: None or int or tuple, axis to compute std over

33

- dtype: data type, output data type

34

- out: cupy.ndarray, output array

35

- ddof: int, delta degrees of freedom

36

- keepdims: bool, keep reduced dimensions

37

38

Returns:

39

cupy.ndarray: Standard deviation

40

"""

41

42

def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

43

"""Compute variance along specified axis."""

44

45

def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):

46

"""

47

Compute median along specified axis.

48

49

Parameters:

50

- a: array-like, input array

51

- axis: None or int or tuple, axis to compute median over

52

- out: cupy.ndarray, output array

53

- overwrite_input: bool, whether input can be overwritten

54

- keepdims: bool, keep reduced dimensions

55

56

Returns:

57

cupy.ndarray: Median values

58

"""

59

60

def average(a, axis=None, weights=None, returned=False):

61

"""

62

Compute weighted average along specified axis.

63

64

Parameters:

65

- a: array-like, input array

66

- axis: None or int or tuple, axis to average over

67

- weights: array-like, weights for each value

68

- returned: bool, return tuple (average, sum_of_weights)

69

70

Returns:

71

cupy.ndarray: Weighted average

72

tuple: (average, sum_of_weights) if returned=True

73

"""

74

```

75

76

### Order Statistics

77

78

Functions for finding minimum, maximum, and quantile values.

79

80

```python { .api }

81

def amax(a, axis=None, out=None, keepdims=False, initial=None, where=None):

82

"""

83

Return maximum of array or maximum along axis.

84

85

Parameters:

86

- a: array-like, input array

87

- axis: None or int or tuple, axis for maximum

88

- out: cupy.ndarray, output array

89

- keepdims: bool, keep reduced dimensions

90

- initial: scalar, minimum value of output

91

- where: array-like, elements to include

92

93

Returns:

94

cupy.ndarray: Maximum values

95

"""

96

97

def amin(a, axis=None, out=None, keepdims=False, initial=None, where=None):

98

"""Return minimum of array or minimum along axis."""

99

100

def max(a, axis=None, out=None, keepdims=False, initial=None, where=None):

101

"""Alias for amax."""

102

103

def min(a, axis=None, out=None, keepdims=False, initial=None, where=None):

104

"""Alias for amin."""

105

106

def ptp(a, axis=None, out=None, keepdims=False):

107

"""

108

Return range (maximum - minimum) along axis.

109

110

Returns:

111

cupy.ndarray: Peak-to-peak values

112

"""

113

114

def percentile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):

115

"""

116

Compute qth percentile along specified axis.

117

118

Parameters:

119

- a: array-like, input array

120

- q: float or sequence, percentile(s) to compute (0-100)

121

- axis: None or int or tuple, axis to compute over

122

- out: cupy.ndarray, output array

123

- overwrite_input: bool, whether input can be overwritten

124

- interpolation: str, interpolation method

125

- keepdims: bool, keep reduced dimensions

126

127

Returns:

128

cupy.ndarray: Percentile values

129

"""

130

131

def quantile(a, q, axis=None, out=None, overwrite_input=False, interpolation='linear', keepdims=False):

132

"""

133

Compute qth quantile along specified axis.

134

135

Parameters:

136

- q: float or sequence, quantile(s) to compute (0-1)

137

138

Returns:

139

cupy.ndarray: Quantile values

140

"""

141

```

142

143

### Sorting Functions

144

145

Efficient GPU-accelerated sorting algorithms.

146

147

```python { .api }

148

def sort(a, axis=-1, kind=None, order=None):

149

"""

150

Return sorted copy of array.

151

152

Parameters:

153

- a: array-like, input array

154

- axis: int or None, axis to sort along

155

- kind: str, sorting algorithm (ignored, uses GPU-optimized method)

156

- order: str or list, field order for structured arrays

157

158

Returns:

159

cupy.ndarray: Sorted array

160

"""

161

162

def argsort(a, axis=-1, kind=None, order=None):

163

"""

164

Return indices that would sort array.

165

166

Returns:

167

cupy.ndarray: Indices that sort a along specified axis

168

"""

169

170

def lexsort(keys, axis=-1):

171

"""

172

Perform indirect stable sort using multiple keys.

173

174

Parameters:

175

- keys: tuple of arrays, sort keys (last key is primary)

176

- axis: int, axis to sort along

177

178

Returns:

179

cupy.ndarray: Indices that sort the keys

180

"""

181

182

def msort(a):

183

"""Sort array along first axis."""

184

185

def sort_complex(a):

186

"""Sort complex array using real part first, then imaginary."""

187

188

def partition(a, kth, axis=-1, kind='introselect', order=None):

189

"""

190

Return partitioned copy where kth element is in final sorted position.

191

192

Parameters:

193

- a: array-like, input array

194

- kth: int or sequence, element index(es) for partitioning

195

- axis: int or None, axis to partition along

196

- kind: str, selection algorithm

197

- order: str or list, field order for structured arrays

198

199

Returns:

200

cupy.ndarray: Partitioned array

201

"""

202

203

def argpartition(a, kth, axis=-1, kind='introselect', order=None):

204

"""Return indices that partition array."""

205

```

206

207

### Searching Functions

208

209

Functions for finding elements in arrays.

210

211

```python { .api }

212

def argmax(a, axis=None, out=None):

213

"""

214

Return indices of maximum values along axis.

215

216

Parameters:

217

- a: array-like, input array

218

- axis: int or None, axis to find maximum along

219

- out: cupy.ndarray, output array

220

221

Returns:

222

cupy.ndarray: Indices of maximum values

223

"""

224

225

def argmin(a, axis=None, out=None):

226

"""Return indices of minimum values along axis."""

227

228

def nonzero(a):

229

"""

230

Return indices of non-zero elements.

231

232

Returns:

233

tuple: Tuple of arrays, one for each dimension

234

"""

235

236

def where(condition, x=None, y=None):

237

"""

238

Return elements chosen from x or y depending on condition.

239

240

Parameters:

241

- condition: array-like, boolean condition

242

- x, y: array-like, values to choose from

243

244

Returns:

245

cupy.ndarray: Elements from x where condition is True, y elsewhere

246

tuple: If x and y not given, equivalent to nonzero(condition)

247

"""

248

249

def searchsorted(a, v, side='left', sorter=None):

250

"""

251

Find indices where elements should be inserted to maintain order.

252

253

Parameters:

254

- a: array-like, sorted input array

255

- v: array-like, values to insert

256

- side: {'left', 'right'}, insertion side for equal values

257

- sorter: array-like, indices that sort a

258

259

Returns:

260

cupy.ndarray: Insertion indices

261

"""

262

```

263

264

### Histograms

265

266

Functions for computing histograms and frequency distributions.

267

268

```python { .api }

269

def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):

270

"""

271

Compute histogram of array.

272

273

Parameters:

274

- a: array-like, input data

275

- bins: int or sequence, bin specification

276

- range: tuple, range of bins

277

- normed: bool, deprecated, use density

278

- weights: array-like, weights for each value

279

- density: bool, return probability density

280

281

Returns:

282

tuple: (hist, bin_edges)

283

"""

284

285

def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):

286

"""

287

Compute 2D histogram of two arrays.

288

289

Parameters:

290

- x, y: array-like, input data

291

- bins: int or sequence, bin specification

292

- range: array-like, bin ranges

293

- normed: bool, deprecated, use density

294

- weights: array-like, weights for each value

295

- density: bool, return probability density

296

297

Returns:

298

tuple: (H, xedges, yedges)

299

"""

300

301

def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):

302

"""Compute multidimensional histogram."""

303

304

def bincount(x, weights=None, minlength=0):

305

"""

306

Count occurrences of each value in array of non-negative integers.

307

308

Parameters:

309

- x: array-like, input array of non-negative integers

310

- weights: array-like, weights for each value

311

- minlength: int, minimum number of bins

312

313

Returns:

314

cupy.ndarray: Number of occurrences of each value

315

"""

316

317

def digitize(x, bins, right=False):

318

"""

319

Return indices of bins to which each value belongs.

320

321

Parameters:

322

- x: array-like, input array

323

- bins: array-like, bin edges (monotonic)

324

- right: bool, whether intervals include right edge

325

326

Returns:

327

cupy.ndarray: Bin indices for each value in x

328

"""

329

```

330

331

### Correlations

332

333

Functions for computing correlations and covariances.

334

335

```python { .api }

336

def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):

337

"""

338

Return Pearson correlation coefficients.

339

340

Parameters:

341

- x: array-like, input array

342

- y: array-like, additional input array

343

- rowvar: bool, whether rows are variables

344

- bias: deprecated

345

- ddof: deprecated

346

347

Returns:

348

cupy.ndarray: Correlation coefficient matrix

349

"""

350

351

def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):

352

"""

353

Estimate covariance matrix.

354

355

Parameters:

356

- m: array-like, input array

357

- y: array-like, additional input array

358

- rowvar: bool, whether rows are variables

359

- bias: bool, use biased estimator

360

- ddof: int, delta degrees of freedom

361

- fweights: array-like, frequency weights

362

- aweights: array-like, observation weights

363

364

Returns:

365

cupy.ndarray: Covariance matrix

366

"""

367

368

def correlate(a, v, mode='valid'):

369

"""

370

Cross-correlation of two 1D sequences.

371

372

Parameters:

373

- a, v: array-like, input sequences

374

- mode: {'valid', 'same', 'full'}, output size

375

376

Returns:

377

cupy.ndarray: Cross-correlation result

378

"""

379

```

380

381

## Usage Examples

382

383

### Basic Statistics

384

385

```python

386

import cupy as cp

387

388

# Generate sample data

389

data = cp.random.normal(10, 2, size=10000)

390

391

# Descriptive statistics

392

mean_val = cp.mean(data)

393

std_val = cp.std(data)

394

var_val = cp.var(data)

395

median_val = cp.median(data)

396

397

# Percentiles

398

q25 = cp.percentile(data, 25)

399

q75 = cp.percentile(data, 75)

400

iqr = q75 - q25

401

402

print(f"Mean: {mean_val:.2f}, Std: {std_val:.2f}")

403

print(f"Median: {median_val:.2f}, IQR: {iqr:.2f}")

404

```

405

406

### Sorting and Searching

407

408

```python

409

import cupy as cp

410

411

# Create test array

412

arr = cp.random.randint(0, 100, size=1000)

413

414

# Sort array

415

sorted_arr = cp.sort(arr)

416

sort_indices = cp.argsort(arr)

417

418

# Find extreme values

419

max_idx = cp.argmax(arr)

420

min_idx = cp.argmin(arr)

421

422

# Search for values

423

search_values = cp.array([25, 50, 75])

424

insertion_points = cp.searchsorted(sorted_arr, search_values)

425

426

# Boolean indexing

427

mask = arr > 50

428

high_values = arr[mask]

429

high_indices = cp.nonzero(mask)[0]

430

```

431

432

### Histograms and Distributions

433

434

```python

435

import cupy as cp

436

437

# Generate data from multiple distributions

438

normal_data = cp.random.normal(0, 1, 5000)

439

uniform_data = cp.random.uniform(-3, 3, 5000)

440

441

# Compute histograms

442

hist_normal, bins_normal = cp.histogram(normal_data, bins=50, density=True)

443

hist_uniform, bins_uniform = cp.histogram(uniform_data, bins=50, density=True)

444

445

# 2D histogram

446

x = cp.random.normal(0, 1, 1000)

447

y = x + cp.random.normal(0, 0.5, 1000) # Correlated data

448

hist_2d, xedges, yedges = cp.histogram2d(x, y, bins=20)

449

```