NumPy & SciPy-compatible GPU-accelerated computing library for CUDA 11.2 environments
—
File I/O operations for saving and loading arrays in various formats including NumPy's binary formats (.npy, .npz) and text formats, enabling data persistence and interoperability between CuPy and NumPy.
NumPy-compatible binary file operations for efficient array storage.
def save(file, arr, allow_pickle=True, fix_imports=True):
"""
Save array to binary file in NumPy .npy format.
Parameters:
- file: str or file-like, output file path or object
- arr: array-like, array to save
- allow_pickle: bool, allow pickling object arrays
- fix_imports: bool, fix Python 2/3 pickle compatibility
"""
def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):
"""
Load array from .npy file.
Parameters:
- file: str or file-like, input file path or object
- mmap_mode: str, memory mapping mode (None, 'r+', 'r', 'w+', 'c')
- allow_pickle: bool, allow loading pickled objects
- fix_imports: bool, fix Python 2/3 pickle compatibility
- encoding: str, encoding for Python 2 compatibility
Returns:
cupy.ndarray, loaded array
"""
def savez(file, *args, **kwds):
"""
Save multiple arrays in uncompressed .npz format.
Parameters:
- file: str or file-like, output file path
- args: arrays to save with auto-generated names
- kwds: arrays to save with specified names
"""
def savez_compressed(file, *args, **kwds):
"""
Save multiple arrays in compressed .npz format.
Parameters:
- file: str or file-like, output file path
- args: arrays to save with auto-generated names
- kwds: arrays to save with specified names
"""Text-based file operations for human-readable array storage.
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\\n', header='', footer='', comments='# ', encoding=None):
"""
Save array to text file.
Parameters:
- fname: str or file-like, output file name or object
- X: array-like, 1-D or 2-D array to save
- fmt: str or sequence, format string for numbers
- delimiter: str, column separator
- newline: str, line separator
- header: str, header text
- footer: str, footer text
- comments: str, comment prefix for header/footer
- encoding: str, text encoding
"""
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
"""
Load data from text file.
Parameters:
- fname: str or file-like, input file name or object
- dtype: data type, output array type
- comments: str or sequence, comment prefixes
- delimiter: str, column separator
- converters: dict, column converters
- skiprows: int, number of rows to skip
- usecols: int or sequence, columns to read
- unpack: bool, unpack columns into separate arrays
- ndmin: int, minimum dimensions
- encoding: str, text encoding
- max_rows: int, maximum rows to read
Returns:
cupy.ndarray, loaded array
"""
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=None, defaultfmt='f%i', autostrip=False, replace_space='_', case_sensitive=True, unpack=None, invalid_raise=True, max_rows=None, encoding='bytes'):
"""
Load data from text file with missing values handling.
Parameters:
- fname: str or file-like, input file
- dtype: data type, output type
- comments: str, comment prefix
- delimiter: str, column separator
- skip_header: int, header lines to skip
- skip_footer: int, footer lines to skip
- converters: dict, column converters
- missing_values: str or dict, missing value indicators
- filling_values: scalar or dict, fill values for missing data
- usecols: sequence, columns to use
- names: bool or sequence, field names
- excludelist: sequence, names to exclude
- deletechars: str, characters to remove from names
- defaultfmt: str, default format for names
- autostrip: bool, automatically strip whitespace
- replace_space: str, replacement for spaces in names
- case_sensitive: bool, case sensitive field names
- unpack: bool, unpack to separate arrays
- invalid_raise: bool, raise on invalid values
- max_rows: int, maximum rows to read
- encoding: str, text encoding
Returns:
cupy.ndarray, loaded array with structured dtype if names specified
"""Functions for converting arrays to string representations.
def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
"""
Return string representation of array.
Parameters:
- arr: ndarray, input array
- max_line_width: int, maximum line width
- precision: int, floating point precision
- suppress_small: bool, suppress small values
Returns:
str, string representation
"""
def array_str(a, max_line_width=None, precision=None, suppress_small=None):
"""
Return string representation of array data.
Parameters:
- a: ndarray, input array
- max_line_width: int, maximum line width
- precision: int, floating point precision
- suppress_small: bool, suppress small values
Returns:
str, string representation of array data
"""
def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix="", style=repr, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix="", **kwarg):
"""
Return string representation of array with full control.
Parameters:
- a: ndarray, input array
- max_line_width: int, maximum characters per line
- precision: int, floating point precision
- suppress_small: bool, suppress small values
- separator: str, element separator
- prefix: str, prefix string
- style: callable, formatting function
- formatter: dict, custom formatters
- threshold: int, threshold for summarization
- edgeitems: int, items at edges in summary
- sign: str, sign handling ('-', '+', ' ')
- floatmode: str, float format mode
- suffix: str, suffix string
Returns:
str, formatted string representation
"""import cupy as cp
import numpy as np
# Create test data
data = cp.random.random((1000, 1000))
labels = cp.arange(1000)
metadata = cp.array(['sample_' + str(i) for i in range(100)])
# Save single array to .npy file
cp.save('data.npy', data)
# Load array from .npy file
loaded_data = cp.load('data.npy')
print(f"Original shape: {data.shape}, Loaded shape: {loaded_data.shape}")
# Save multiple arrays to .npz file
cp.savez('dataset.npz',
features=data,
labels=labels,
metadata=metadata)
# Save with compression
cp.savez_compressed('dataset_compressed.npz',
features=data,
labels=labels)
# Load from .npz file
npz_file = cp.load('dataset.npz')
loaded_features = npz_file['features']
loaded_labels = npz_file['labels']
npz_file.close() # Good practice to closeimport cupy as cp
# Create sample data
measurements = cp.random.normal(100, 15, (50, 3))
timestamps = cp.arange(50)
# Save to text file with custom formatting
cp.savetxt('measurements.txt',
measurements,
fmt='%.2f',
delimiter=',',
header='Temperature,Humidity,Pressure',
comments='')
# Save with more complex formatting
combined_data = cp.column_stack([timestamps, measurements])
cp.savetxt('timestamped_data.csv',
combined_data,
fmt=['%d', '%.2f', '%.2f', '%.2f'],
delimiter=',',
header='Timestamp,Temperature,Humidity,Pressure',
comments='')
# Load text data
loaded_measurements = cp.loadtxt('measurements.txt', delimiter=',', skiprows=1)
print(f"Loaded data shape: {loaded_measurements.shape}")
# Load with column selection
temp_humidity = cp.loadtxt('measurements.txt',
delimiter=',',
skiprows=1,
usecols=(0, 1))
# Load and unpack columns
temp, humidity, pressure = cp.loadtxt('measurements.txt',
delimiter=',',
skiprows=1,
unpack=True)import cupy as cp
# Create data with missing values (simulate by saving with NaN)
data_with_missing = cp.random.random((20, 4))
data_with_missing[5:8, 1] = cp.nan
data_with_missing[12:15, 2] = cp.nan
# Save data
cp.savetxt('data_with_missing.txt', data_with_missing, fmt='%.6f')
# Load with missing value handling using genfromtxt
loaded_with_missing = cp.genfromtxt('data_with_missing.txt',
missing_values='nan',
filling_values=-999.0)
print(f"Missing values filled with -999: {cp.sum(loaded_with_missing == -999.0)}")
# Load structured data with field names
structured_data = cp.genfromtxt('timestamped_data.csv',
delimiter=',',
names=True,
dtype=None,
encoding='utf-8')import cupy as cp
import numpy as np
# Create CuPy array
gpu_data = cp.random.random((500, 500))
# Save CuPy array (automatically transfers to CPU)
cp.save('gpu_data.npy', gpu_data)
# Load into NumPy
numpy_data = np.load('gpu_data.npy')
print(f"NumPy loaded data type: {type(numpy_data)}")
# Load back into CuPy
cupy_data = cp.load('gpu_data.npy')
print(f"CuPy loaded data type: {type(cupy_data)}")
# Cross-platform compatibility
# Save from CuPy, load with NumPy
cp.savez('cross_platform.npz',
array1=cp.ones((100, 100)),
array2=cp.zeros((50, 50)))
# Load with NumPy
np_loaded = np.load('cross_platform.npz')
np_array1 = np_loaded['array1']
print(f"NumPy can load CuPy-saved data: {np_array1.shape}")
# Convert and save with NumPy, load with CuPy
np.save('numpy_saved.npy', np.random.random((200, 200)))
cp_loaded = cp.load('numpy_saved.npy')
print(f"CuPy can load NumPy-saved data: {cp_loaded.shape}")import cupy as cp
import numpy as np
# Create large dataset (using NumPy for memory mapping)
large_data = np.random.random((10000, 1000)).astype(np.float32)
np.save('large_dataset.npy', large_data)
# Memory map the file (read-only)
# Note: CuPy load doesn't support mmap_mode, so we use NumPy for mapping
mmapped_data = np.load('large_dataset.npy', mmap_mode='r')
# Process chunks with CuPy
chunk_size = 1000
for i in range(0, len(mmapped_data), chunk_size):
chunk = mmapped_data[i:i+chunk_size]
# Transfer chunk to GPU
gpu_chunk = cp.asarray(chunk)
# Process on GPU
processed = cp.sqrt(gpu_chunk + 1.0)
# Get result back if needed
result = cp.asnumpy(processed)
# Process or save result
print(f"Processed chunk {i//chunk_size + 1}/{len(mmapped_data)//chunk_size}")import cupy as cp
# Create test arrays
small_array = cp.array([[1.23456789, 2.34567890],
[3.45678901, 4.56789012]])
large_array = cp.random.random((100, 100))
# Control string representation
print("Default representation:")
print(cp.array_str(small_array))
print("\\nCustom precision:")
print(cp.array_str(small_array, precision=2))
print("\\nCustom representation:")
print(cp.array_repr(small_array, precision=3, suppress_small=True))
# Full control with array2string
custom_repr = cp.array2string(small_array,
precision=4,
separator=', ',
prefix='Array: ',
suffix=' [end]')
print(f"\\nCustom format: {custom_repr}")
# Threshold for large arrays
print("\\nLarge array summary:")
print(cp.array_str(large_array, threshold=10, edgeitems=2))import cupy as cp
import os
# Create test data of different types
float_data = cp.random.random((1000, 1000)).astype(cp.float32)
int_data = cp.random.randint(0, 100, (1000, 1000), dtype=cp.int32)
bool_data = cp.random.random((1000, 1000)) > 0.5
# Save in different formats and compare file sizes
formats = {
'uncompressed_npz': lambda: cp.savez('test_uncompressed.npz',
f=float_data, i=int_data, b=bool_data),
'compressed_npz': lambda: cp.savez_compressed('test_compressed.npz',
f=float_data, i=int_data, b=bool_data),
'individual_npy': lambda: [cp.save(f'test_{t}.npy', d)
for t, d in [('float', float_data),
('int', int_data),
('bool', bool_data)]]
}
for format_name, save_func in formats.items():
save_func()
if format_name == 'individual_npy':
total_size = sum(os.path.getsize(f'test_{t}.npy')
for t in ['float', 'int', 'bool'])
print(f"{format_name}: {total_size / 1024 / 1024:.2f} MB")
else:
filename = f"test_{format_name.split('_')[0]}.npz"
size = os.path.getsize(filename)
print(f"{format_name}: {size / 1024 / 1024:.2f} MB")Install with Tessl CLI
npx tessl i tessl/pypi-cupy-cuda112