or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-creation.mdcuda-management.mdfft.mdindex.mdkernels.mdlinear-algebra.mdmath-functions.mdrandom.mdscipy-extensions.mdsparse.mdstatistics.md

statistics.mddocs/

0

# Statistics and Data Analysis

1

2

Statistical functions for data analysis including descriptive statistics, correlations, and histograms. All functions operate on GPU arrays and support axis-wise operations with the same interface as NumPy.

3

4

## Capabilities

5

6

### Descriptive Statistics

7

8

Basic statistical measures for data analysis.

9

10

```python { .api }

11

def mean(a, axis=None, dtype=None, out=None, keepdims=False):

12

"""

13

Compute arithmetic mean along specified axes.

14

15

Parameters:

16

- a: array-like, input data

17

- axis: None or int or tuple of ints, axes to compute mean over

18

- dtype: data type, type of output

19

- out: cupy.ndarray, output array

20

- keepdims: bool, keep reduced dimensions as size 1

21

22

Returns:

23

cupy.ndarray: Mean values

24

"""

25

26

def std(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

27

"""Compute standard deviation along specified axes."""

28

29

def var(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

30

"""Compute variance along specified axes."""

31

32

def median(a, axis=None, out=None, overwrite_input=False, keepdims=False):

33

"""Compute median along specified axes."""

34

35

def average(a, axis=None, weights=None, returned=False):

36

"""

37

Compute weighted average along specified axis.

38

39

Parameters:

40

- a: array-like, input data

41

- axis: None or int, axis to average over

42

- weights: array-like, weights for averaging

43

- returned: bool, return weights sum if True

44

45

Returns:

46

cupy.ndarray: Weighted average

47

tuple: (average, sum_of_weights) if returned=True

48

"""

49

```

50

51

### Order Statistics

52

53

Statistical measures based on data ordering.

54

55

```python { .api }

56

def amax(a, axis=None, out=None, keepdims=False, initial=None, where=None):

57

"""Return maximum along axes."""

58

59

def amin(a, axis=None, out=None, keepdims=False, initial=None, where=None):

60

"""Return minimum along axes."""

61

62

def max(a, axis=None, out=None, keepdims=False, initial=None, where=None):

63

"""Return maximum along axes (alias for amax)."""

64

65

def min(a, axis=None, out=None, keepdims=False, initial=None, where=None):

66

"""Return minimum along axes (alias for amin)."""

67

68

def percentile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):

69

"""

70

Compute percentiles along specified axes.

71

72

Parameters:

73

- a: array-like, input data

74

- q: float or array-like, percentile(s) to compute (0-100)

75

- axis: None or int or tuple of ints, axes to compute over

76

- out: cupy.ndarray, output array

77

- overwrite_input: bool, allow input modification

78

- method: str, interpolation method

79

- keepdims: bool, keep reduced dimensions

80

81

Returns:

82

cupy.ndarray: Percentile values

83

"""

84

85

def quantile(a, q, axis=None, out=None, overwrite_input=False, method='linear', keepdims=False):

86

"""Compute quantiles along specified axes (0-1 scale)."""

87

88

def ptp(a, axis=None, out=None, keepdims=False):

89

"""Return range (peak-to-peak) along axes."""

90

```

91

92

### Correlation and Covariance

93

94

Statistical relationships between variables.

95

96

```python { .api }

97

def corrcoef(x, y=None, rowvar=True, bias=None, ddof=None):

98

"""

99

Return Pearson correlation coefficients.

100

101

Parameters:

102

- x: array-like, input data

103

- y: array-like, additional data

104

- rowvar: bool, rows represent variables if True

105

- bias: deprecated parameter

106

- ddof: deprecated parameter

107

108

Returns:

109

cupy.ndarray: Correlation coefficient matrix

110

"""

111

112

def cov(m, y=None, rowvar=True, bias=False, ddof=None, fweights=None, aweights=None):

113

"""

114

Estimate covariance matrix.

115

116

Parameters:

117

- m: array-like, input data

118

- y: array-like, additional data

119

- rowvar: bool, rows represent variables if True

120

- bias: bool, normalization by N if True, N-1 if False

121

- ddof: int, delta degrees of freedom

122

- fweights: array-like, frequency weights

123

- aweights: array-like, observation weights

124

125

Returns:

126

cupy.ndarray: Covariance matrix

127

"""

128

129

def correlate(a, v, mode='valid'):

130

"""

131

Cross-correlation of two 1-D sequences.

132

133

Parameters:

134

- a, v: array-like, input sequences

135

- mode: {'valid', 'same', 'full'}, output size

136

137

Returns:

138

cupy.ndarray: Cross-correlation

139

"""

140

```

141

142

### Histograms

143

144

Data distribution analysis and binning.

145

146

```python { .api }

147

def histogram(a, bins=10, range=None, normed=None, weights=None, density=None):

148

"""

149

Compute histogram of dataset.

150

151

Parameters:

152

- a: array-like, input data

153

- bins: int or array-like, number of bins or bin edges

154

- range: tuple, lower and upper range of bins

155

- normed: deprecated, use density instead

156

- weights: array-like, weights for each value

157

- density: bool, normalize to probability density

158

159

Returns:

160

tuple: (hist, bin_edges)

161

"""

162

163

def histogram2d(x, y, bins=10, range=None, normed=None, weights=None, density=None):

164

"""

165

Compute 2D histogram.

166

167

Parameters:

168

- x, y: array-like, input data

169

- bins: int or [int, int] or array-like, bin specification

170

- range: array-like, bin ranges [[xmin, xmax], [ymin, ymax]]

171

- normed: deprecated, use density instead

172

- weights: array-like, weights for each sample

173

- density: bool, normalize to probability density

174

175

Returns:

176

tuple: (H, xedges, yedges)

177

"""

178

179

def histogramdd(sample, bins=10, range=None, normed=None, weights=None, density=None):

180

"""Compute multidimensional histogram."""

181

182

def bincount(x, weights=None, minlength=0):

183

"""

184

Count occurrences of each value in array.

185

186

Parameters:

187

- x: array-like, non-negative integer array

188

- weights: array-like, weights for each value

189

- minlength: int, minimum number of bins

190

191

Returns:

192

cupy.ndarray: Counts for each value

193

"""

194

195

def digitize(x, bins, right=False):

196

"""

197

Return indices of bins to which each value belongs.

198

199

Parameters:

200

- x: array-like, input array

201

- bins: array-like, bin edges

202

- right: bool, left or right interval boundaries

203

204

Returns:

205

cupy.ndarray: Bin indices

206

"""

207

```

208

209

### NaN-aware Statistics

210

211

Statistical functions that handle NaN values appropriately.

212

213

```python { .api }

214

def nanmean(a, axis=None, dtype=None, out=None, keepdims=False):

215

"""Compute mean ignoring NaNs."""

216

217

def nanstd(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

218

"""Compute standard deviation ignoring NaNs."""

219

220

def nanvar(a, axis=None, dtype=None, out=None, ddof=0, keepdims=False):

221

"""Compute variance ignoring NaNs."""

222

223

def nanmedian(a, axis=None, out=None, overwrite_input=False, keepdims=False):

224

"""Compute median ignoring NaNs."""

225

226

def nanmax(a, axis=None, out=None, keepdims=False):

227

"""Return maximum ignoring NaNs."""

228

229

def nanmin(a, axis=None, out=None, keepdims=False):

230

"""Return minimum ignoring NaNs."""

231

```

232

233

## Usage Examples

234

235

### Basic Statistical Analysis

236

237

```python

238

import cupy as cp

239

240

# Sample data

241

data = cp.random.normal(100, 15, size=(10000,))

242

243

# Basic statistics

244

mean_val = cp.mean(data)

245

std_val = cp.std(data)

246

var_val = cp.var(data)

247

median_val = cp.median(data)

248

249

print(f"Mean: {mean_val:.2f}, Std: {std_val:.2f}")

250

print(f"Median: {median_val:.2f}, Range: {cp.ptp(data):.2f}")

251

252

# Percentiles

253

percentiles = cp.percentile(data, [25, 50, 75, 90, 95])

254

```

255

256

### Multi-dimensional Statistics

257

258

```python

259

# Multi-dimensional data analysis

260

matrix_data = cp.random.normal(0, 1, size=(1000, 50))

261

262

# Statistics along different axes

263

col_means = cp.mean(matrix_data, axis=0) # Mean of each column

264

row_means = cp.mean(matrix_data, axis=1) # Mean of each row

265

overall_mean = cp.mean(matrix_data) # Overall mean

266

267

# Correlation analysis

268

correlation_matrix = cp.corrcoef(matrix_data.T) # 50x50 correlation matrix

269

covariance_matrix = cp.cov(matrix_data.T) # 50x50 covariance matrix

270

```

271

272

### Histogram Analysis

273

274

```python

275

# Distribution analysis

276

data = cp.random.exponential(2.0, size=100000)

277

278

# Basic histogram

279

counts, bin_edges = cp.histogram(data, bins=50, range=(0, 20))

280

281

# Probability density

282

density_counts, _ = cp.histogram(data, bins=50, range=(0, 20), density=True)

283

284

# 2D histogram for joint distributions

285

x = cp.random.normal(0, 1, 10000)

286

y = 2*x + cp.random.normal(0, 0.5, 10000)

287

H, xedges, yedges = cp.histogram2d(x, y, bins=50)

288

```