or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

array-operations.mdcuda-interface.mdfft-operations.mdindex.mdinput-output.mdlinear-algebra.mdmath-operations.mdrandom-generation.mdscipy-extensions.md

input-output.mddocs/

0

# Input/Output Operations

1

2

File I/O operations for saving and loading arrays in various formats including NumPy's binary formats (.npy, .npz) and text formats, enabling data persistence and interoperability between CuPy and NumPy.

3

4

## Capabilities

5

6

### Binary File I/O

7

8

NumPy-compatible binary file operations for efficient array storage.

9

10

```python { .api }

11

def save(file, arr, allow_pickle=True, fix_imports=True):

12

"""

13

Save array to binary file in NumPy .npy format.

14

15

Parameters:

16

- file: str or file-like, output file path or object

17

- arr: array-like, array to save

18

- allow_pickle: bool, allow pickling object arrays

19

- fix_imports: bool, fix Python 2/3 pickle compatibility

20

"""

21

22

def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):

23

"""

24

Load array from .npy file.

25

26

Parameters:

27

- file: str or file-like, input file path or object

28

- mmap_mode: str, memory mapping mode (None, 'r+', 'r', 'w+', 'c')

29

- allow_pickle: bool, allow loading pickled objects

30

- fix_imports: bool, fix Python 2/3 pickle compatibility

31

- encoding: str, encoding for Python 2 compatibility

32

33

Returns:

34

cupy.ndarray, loaded array

35

"""

36

37

def savez(file, *args, **kwds):

38

"""

39

Save multiple arrays in uncompressed .npz format.

40

41

Parameters:

42

- file: str or file-like, output file path

43

- args: arrays to save with auto-generated names

44

- kwds: arrays to save with specified names

45

"""

46

47

def savez_compressed(file, *args, **kwds):

48

"""

49

Save multiple arrays in compressed .npz format.

50

51

Parameters:

52

- file: str or file-like, output file path

53

- args: arrays to save with auto-generated names

54

- kwds: arrays to save with specified names

55

"""

56

```

57

58

### Text File I/O

59

60

Text-based file operations for human-readable array storage.

61

62

```python { .api }

63

def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\\n', header='', footer='', comments='# ', encoding=None):

64

"""

65

Save array to text file.

66

67

Parameters:

68

- fname: str or file-like, output file name or object

69

- X: array-like, 1-D or 2-D array to save

70

- fmt: str or sequence, format string for numbers

71

- delimiter: str, column separator

72

- newline: str, line separator

73

- header: str, header text

74

- footer: str, footer text

75

- comments: str, comment prefix for header/footer

76

- encoding: str, text encoding

77

"""

78

79

def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):

80

"""

81

Load data from text file.

82

83

Parameters:

84

- fname: str or file-like, input file name or object

85

- dtype: data type, output array type

86

- comments: str or sequence, comment prefixes

87

- delimiter: str, column separator

88

- converters: dict, column converters

89

- skiprows: int, number of rows to skip

90

- usecols: int or sequence, columns to read

91

- unpack: bool, unpack columns into separate arrays

92

- ndmin: int, minimum dimensions

93

- encoding: str, text encoding

94

- max_rows: int, maximum rows to read

95

96

Returns:

97

cupy.ndarray, loaded array

98

"""

99

100

def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt='f%i', autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):

101

"""

102

Load data from text file with missing values handling.

103

104

Parameters:

105

- fname: str or file-like, input file

106

- dtype: data type, output type

107

- comments: str, comment prefix

108

- delimiter: str, column separator

109

- skip_header: int, header lines to skip

110

- skip_footer: int, footer lines to skip

111

- converters: dict, column converters

112

- missing_values: str or dict, missing value indicators

113

- filling_values: scalar or dict, fill values for missing data

114

- usecols: sequence, columns to use

115

- names: bool or sequence, field names

116

- excludelist: sequence, names to exclude

117

- deletechars: str, characters to remove from names

118

- defaultfmt: str, default format for names

119

- autostrip: bool, automatically strip whitespace

120

- replace_space: str, replacement for spaces in names

121

- case_sensitive: bool, case sensitive field names

122

- unpack: bool, unpack to separate arrays

123

- invalid_raise: bool, raise on invalid values

124

- max_rows: int, maximum rows to read

125

- encoding: str, text encoding

126

127

Returns:

128

cupy.ndarray, loaded array with structured dtype if names specified

129

"""

130

```

131

132

### Array String Representation

133

134

Functions for converting arrays to string representations.

135

136

```python { .api }

137

def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):

138

"""

139

Return string representation of array.

140

141

Parameters:

142

- arr: ndarray, input array

143

- max_line_width: int, maximum line width

144

- precision: int, floating point precision

145

- suppress_small: bool, suppress small values

146

147

Returns:

148

str, string representation

149

"""

150

151

def array_str(a, max_line_width=None, precision=None, suppress_small=None):

152

"""

153

Return string representation of array data.

154

155

Parameters:

156

- a: ndarray, input array

157

- max_line_width: int, maximum line width

158

- precision: int, floating point precision

159

- suppress_small: bool, suppress small values

160

161

Returns:

162

str, string representation of array data

163

"""

164

165

def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", **kwarg):

166

"""

167

Return string representation of array with full control.

168

169

Parameters:

170

- a: ndarray, input array

171

- max_line_width: int, maximum characters per line

172

- precision: int, floating point precision

173

- suppress_small: bool, suppress small values

174

- separator: str, element separator

175

- prefix: str, prefix string

176

- style: callable, formatting function

177

- formatter: dict, custom formatters

178

- threshold: int, threshold for summarization

179

- edgeitems: int, items at edges in summary

180

- sign: str, sign handling ('-', '+', ' ')

181

- floatmode: str, float format mode

182

- suffix: str, suffix string

183

184

Returns:

185

str, formatted string representation

186

"""

187

```

188

189

## Usage Examples

190

191

### Basic File I/O Operations

192

193

```python

194

import cupy as cp

195

import numpy as np

196

197

# Create test data

198

data = cp.random.random((1000, 1000))

199

labels = cp.arange(1000)

200

metadata = cp.array(['sample_' + str(i) for i in range(100)])

201

202

# Save single array to .npy file

203

cp.save('data.npy', data)

204

205

# Load array from .npy file

206

loaded_data = cp.load('data.npy')

207

print(f"Original shape: {data.shape}, Loaded shape: {loaded_data.shape}")

208

209

# Save multiple arrays to .npz file

210

cp.savez('dataset.npz',

211

features=data,

212

labels=labels,

213

metadata=metadata)

214

215

# Save with compression

216

cp.savez_compressed('dataset_compressed.npz',

217

features=data,

218

labels=labels)

219

220

# Load from .npz file

221

npz_file = cp.load('dataset.npz')

222

loaded_features = npz_file['features']

223

loaded_labels = npz_file['labels']

224

npz_file.close() # Good practice to close

225

```

226

227

### Text File Operations

228

229

```python

230

import cupy as cp

231

232

# Create sample data

233

measurements = cp.random.normal(100, 15, (50, 3))

234

timestamps = cp.arange(50)

235

236

# Save to text file with custom formatting

237

cp.savetxt('measurements.txt',

238

measurements,

239

fmt='%.2f',

240

delimiter=',',

241

header='Temperature,Humidity,Pressure',

242

comments='')

243

244

# Save with more complex formatting

245

combined_data = cp.column_stack([timestamps, measurements])

246

cp.savetxt('timestamped_data.csv',

247

combined_data,

248

fmt=['%d', '%.2f', '%.2f', '%.2f'],

249

delimiter=',',

250

header='Timestamp,Temperature,Humidity,Pressure',

251

comments='')

252

253

# Load text data

254

loaded_measurements = cp.loadtxt('measurements.txt', delimiter=',', skiprows=1)

255

print(f"Loaded data shape: {loaded_measurements.shape}")

256

257

# Load with column selection

258

temp_humidity = cp.loadtxt('measurements.txt',

259

delimiter=',',

260

skiprows=1,

261

usecols=(0, 1))

262

263

# Load and unpack columns

264

temp, humidity, pressure = cp.loadtxt('measurements.txt',

265

delimiter=',',

266

skiprows=1,

267

unpack=True)

268

```

269

270

### Advanced Text Processing

271

272

```python

273

import cupy as cp

274

275

# Create data with missing values (simulate by saving with NaN)

276

data_with_missing = cp.random.random((20, 4))

277

data_with_missing[5:8, 1] = cp.nan

278

data_with_missing[12:15, 2] = cp.nan

279

280

# Save data

281

cp.savetxt('data_with_missing.txt', data_with_missing, fmt='%.6f')

282

283

# Load with missing value handling using genfromtxt

284

loaded_with_missing = cp.genfromtxt('data_with_missing.txt',

285

missing_values='nan',

286

filling_values=-999.0)

287

288

print(f"Missing values filled with -999: {cp.sum(loaded_with_missing == -999.0)}")

289

290

# Load structured data with field names

291

structured_data = cp.genfromtxt('timestamped_data.csv',

292

delimiter=',',

293

names=True,

294

dtype=None,

295

encoding='utf-8')

296

```

297

298

### Interoperability with NumPy

299

300

```python

301

import cupy as cp

302

import numpy as np

303

304

# Create CuPy array

305

gpu_data = cp.random.random((500, 500))

306

307

# Save CuPy array (automatically transfers to CPU)

308

cp.save('gpu_data.npy', gpu_data)

309

310

# Load into NumPy

311

numpy_data = np.load('gpu_data.npy')

312

print(f"NumPy loaded data type: {type(numpy_data)}")

313

314

# Load back into CuPy

315

cupy_data = cp.load('gpu_data.npy')

316

print(f"CuPy loaded data type: {type(cupy_data)}")

317

318

# Cross-platform compatibility

319

# Save from CuPy, load with NumPy

320

cp.savez('cross_platform.npz',

321

array1=cp.ones((100, 100)),

322

array2=cp.zeros((50, 50)))

323

324

# Load with NumPy

325

np_loaded = np.load('cross_platform.npz')

326

np_array1 = np_loaded['array1']

327

print(f"NumPy can load CuPy-saved data: {np_array1.shape}")

328

329

# Convert and save with NumPy, load with CuPy

330

np.save('numpy_saved.npy', np.random.random((200, 200)))

331

cp_loaded = cp.load('numpy_saved.npy')

332

print(f"CuPy can load NumPy-saved data: {cp_loaded.shape}")

333

```

334

335

### Memory Mapping for Large Files

336

337

```python

338

import cupy as cp

339

import numpy as np

340

341

# Create large dataset (using NumPy for memory mapping)

342

large_data = np.random.random((10000, 1000)).astype(np.float32)

343

np.save('large_dataset.npy', large_data)

344

345

# Memory map the file (read-only)

346

# Note: CuPy load doesn't support mmap_mode, so we use NumPy for mapping

347

mmapped_data = np.load('large_dataset.npy', mmap_mode='r')

348

349

# Process chunks with CuPy

350

chunk_size = 1000

351

for i in range(0, len(mmapped_data), chunk_size):

352

chunk = mmapped_data[i:i+chunk_size]

353

354

# Transfer chunk to GPU

355

gpu_chunk = cp.asarray(chunk)

356

357

# Process on GPU

358

processed = cp.sqrt(gpu_chunk + 1.0)

359

360

# Get result back if needed

361

result = cp.asnumpy(processed)

362

363

# Process or save result

364

print(f"Processed chunk {i//chunk_size + 1}/{len(mmapped_data)//chunk_size}")

365

```

366

367

### Custom Array Formatting

368

369

```python

370

import cupy as cp

371

372

# Create test arrays

373

small_array = cp.array([[1.23456789, 2.34567890],

374

[3.45678901, 4.56789012]])

375

376

large_array = cp.random.random((100, 100))

377

378

# Control string representation

379

print("Default representation:")

380

print(cp.array_str(small_array))

381

382

print("\\nCustom precision:")

383

print(cp.array_str(small_array, precision=2))

384

385

print("\\nCustom representation:")

386

print(cp.array_repr(small_array, precision=3, suppress_small=True))

387

388

# Full control with array2string

389

custom_repr = cp.array2string(small_array,

390

precision=4,

391

separator=', ',

392

prefix='Array: ',

393

suffix=' [end]')

394

print(f"\\nCustom format: {custom_repr}")

395

396

# Threshold for large arrays

397

print("\\nLarge array summary:")

398

print(cp.array_str(large_array, threshold=10, edgeitems=2))

399

```

400

401

### File Format Considerations

402

403

```python

404

import cupy as cp

405

import os

406

407

# Create test data of different types

408

float_data = cp.random.random((1000, 1000)).astype(cp.float32)

409

int_data = cp.random.randint(0, 100, (1000, 1000), dtype=cp.int32)

410

bool_data = cp.random.random((1000, 1000)) > 0.5

411

412

# Save in different formats and compare file sizes

413

formats = {

414

'uncompressed_npz': lambda: cp.savez('test_uncompressed.npz',

415

f=float_data, i=int_data, b=bool_data),

416

'compressed_npz': lambda: cp.savez_compressed('test_compressed.npz',

417

f=float_data, i=int_data, b=bool_data),

418

'individual_npy': lambda: [cp.save(f'test_{t}.npy', d)

419

for t, d in [('float', float_data),

420

('int', int_data),

421

('bool', bool_data)]]

422

}

423

424

for format_name, save_func in formats.items():

425

save_func()

426

427

if format_name == 'individual_npy':

428

total_size = sum(os.path.getsize(f'test_{t}.npy')

429

for t in ['float', 'int', 'bool'])

430

print(f"{format_name}: {total_size / 1024 / 1024:.2f} MB")

431

else:

432

filename = f"test_{format_name.split('_')[0]}.npz"

433

size = os.path.getsize(filename)

434

print(f"{format_name}: {size / 1024 / 1024:.2f} MB")

435

```