CuPy: NumPy & SciPy for GPU - A NumPy/SciPy-compatible array library for GPU-accelerated computing with Python, specifically built for CUDA 11.1
—
Comprehensive file I/O operations for loading, saving, and formatting array data. CuPy provides NumPy-compatible I/O functions for various data formats including binary files, compressed archives, text files, and custom formatting options with seamless GPU memory management.
Efficient binary file I/O for preserving exact array data with metadata and supporting single arrays or multiple arrays in compressed archives.
def save(file, arr, allow_pickle=True, fix_imports=True):
"""
Save array to binary file in NumPy .npy format.
Parameters:
- file: str or file-like, output file path or file object
- arr: array_like, array data to save
- allow_pickle: bool, allow saving object arrays with pickle
- fix_imports: bool, force pickle protocol 2 for Python 2 compatibility
Notes:
- Data is transferred to CPU before saving
- Preserves dtype, shape, and array metadata
- Compatible with numpy.load()
"""
def load(file, mmap_mode=None, allow_pickle=False, fix_imports=True, encoding='ASCII'):
"""
Load arrays from binary .npy, .npz files or pickled files.
Parameters:
- file: str or file-like, input file path or file object
- mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, memory mapping mode
- allow_pickle: bool, allow loading pickled object arrays
- fix_imports: bool, assume pickle protocol 2 names for Python 2 compatibility
- encoding: str, encoding for reading Python 2 strings
Returns:
- ndarray or NpzFile: Loaded array data on GPU
Notes:
- Automatically transfers loaded data to GPU
- Supports .npy single array and .npz archive formats
- Compatible with numpy.save() output
"""
def savez(file, *args, **kwds):
"""
Save multiple arrays to single compressed file.
Parameters:
- file: str or file-like, output file path
- *args: arrays to save with automatic naming (arr_0, arr_1, ...)
- **kwds: arrays to save with specified names
Notes:
- Creates .npz archive with multiple arrays
- Arrays transferred to CPU before saving
- Useful for saving related datasets together
"""
def savez_compressed(file, *args, **kwds):
"""
Save multiple arrays to compressed .npz archive.
Parameters:
- file: str or file-like, output file path
- *args: arrays to save with automatic naming
- **kwds: arrays to save with specified names
Notes:
- Same as savez() but with compression for smaller files
- Slower saving but reduced disk space usage
- Recommended for long-term storage
"""Human-readable text format I/O for data exchange, debugging, and integration with other tools and programming languages.
def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None, skiprows=0, usecols=None, unpack=False, ndmin=0, encoding='bytes', max_rows=None):
"""
Load data from text file with each row containing array elements.
Parameters:
- fname: str or file-like, input file path or file object
- dtype: data type, optional (default: float)
- comments: str or sequence, characters marking comment lines
- delimiter: str, optional, field delimiter (default: whitespace)
- converters: dict, optional, mapping column to conversion function
- skiprows: int, lines to skip at beginning of file
- usecols: int or sequence, columns to read
- unpack: bool, return separate arrays for each column
- ndmin: int, minimum number of dimensions for returned array
- encoding: str, encoding for decoding input file
- max_rows: int, optional, maximum rows to read
Returns:
- ndarray: Loaded data on GPU
Notes:
- Data loaded on CPU then transferred to GPU
- Compatible with CSV and whitespace-delimited formats
- Handles various numeric formats and missing values
"""
def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='', footer='', comments='# ', encoding=None):
"""
Save array to text file.
Parameters:
- fname: str or file-like, output file path or file object
- X: 1D or 2D array_like, data to save
- fmt: str or sequence, format string for elements
- delimiter: str, string separating columns
- newline: str, string separating lines
- header: str, string written at beginning of file
- footer: str, string written at end of file
- comments: str, string prefixing header and footer
- encoding: str, encoding for output file
Notes:
- Array transferred to CPU before saving
- Supports custom formatting for each column
- Human-readable output suitable for external tools
"""
def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skip_header=0, skip_footer=0, converters=None, missing_values=None, filling_values=None, usecols=None, names=None, excludelist=None, deletechars=''.join(sorted(''.join(sorted("~!@#$%^&*()+={}[]|\\:;\"'<>,.?/")))), defaultfmt="f%i", autostrip=False, replace_space='_', case_sensitive=True, unpack=None, ndmin=0, encoding='bytes', max_rows=None):
"""
Load data from text file with enhanced handling of missing values.
Parameters:
- fname: str or file-like, input file path
- dtype: data type for array
- comments: str, characters marking comment lines
- delimiter: str, field delimiter
- skip_header: int, lines to skip at start
- skip_footer: int, lines to skip at end
- converters: dict, column converters
- missing_values: set, strings representing missing data
- filling_values: values to use for missing data
- usecols: sequence, columns to read
- names: bool or list, field names for structured arrays
- excludelist: sequence, names to exclude
- deletechars: str, characters to remove from field names
- defaultfmt: str, default field name format
- autostrip: bool, automatically strip whitespace
- replace_space: str, character to replace spaces in names
- case_sensitive: bool, field name case sensitivity
- unpack: bool, return separate arrays
- ndmin: int, minimum dimensions
- encoding: str, file encoding
- max_rows: int, maximum rows to read
Returns:
- ndarray: Loaded data on GPU
Notes:
- More robust than loadtxt for complex text formats
- Handles missing values and structured data
- Supports named fields and data validation
"""
def fromfile(file, dtype=float, count=-1, sep='', offset=0):
"""
Construct array from data in text or binary file.
Parameters:
- file: str or file-like, input file
- dtype: data type for reading
- count: int, number of items to read (-1 for all)
- sep: str, separator between items (empty for binary)
- offset: int, offset from start of file
Returns:
- ndarray: 1D array constructed from file data
Notes:
- Binary mode when sep is empty string
- Text mode when sep is specified
- Data transferred to GPU after reading
"""Functions for seamless data transfer between CPU and GPU memory with format conversion capabilities.
def frombuffer(buffer, dtype=float, count=-1, offset=0):
"""
Interpret buffer as 1D array.
Parameters:
- buffer: buffer_like, object exposing buffer interface
- dtype: data type for interpretation
- count: int, number of items to read (-1 for all)
- offset: int, start reading from this offset
Returns:
- ndarray: 1D array view of buffer data on GPU
Notes:
- Creates view into existing buffer
- Data copied to GPU memory
- Useful for interfacing with other libraries
"""
def fromstring(string, dtype=float, count=-1, sep=''):
"""
Create array from string data.
Parameters:
- string: str, string containing array data
- dtype: data type for parsing
- count: int, number of items to read (-1 for all)
- sep: str, separator between items
Returns:
- ndarray: 1D array parsed from string on GPU
Notes:
- Whitespace-separated when sep is empty
- Custom separator supported
- Convenient for parsing string-formatted data
"""
def fromfunction(func, shape, dtype=float, **kwargs):
"""
Construct array by executing function over coordinate arrays.
Parameters:
- func: callable, function to evaluate over coordinate grids
- shape: sequence of ints, shape of output array
- dtype: data type for output
- **kwargs: additional arguments passed to func
Returns:
- ndarray: Array with values func(coordinates) on GPU
Notes:
- Function called with coordinate arrays as arguments
- Useful for generating coordinate-based patterns
- Function executed on GPU when possible
"""
def fromiter(iterable, dtype, count=-1):
"""
Create array from iterable object.
Parameters:
- iterable: iterable, sequence of values
- dtype: data type for array elements
- count: int, number of items to read (-1 for all)
Returns:
- ndarray: 1D array created from iterable on GPU
Notes:
- Iterates through all items if count is -1
- Efficient for converting Python sequences
- Data transferred to GPU after creation
"""Comprehensive formatting functions for array visualization, debugging, and custom string representations.
def array_repr(arr, max_line_width=None, precision=None, suppress_small=None):
"""
Return string representation of array.
Parameters:
- arr: ndarray, input array
- max_line_width: int, maximum characters per line
- precision: int, floating point precision
- suppress_small: bool, suppress small floating point values
Returns:
- str: String representation suitable for eval()
Notes:
- Creates repr() string that could recreate array
- Respects NumPy print options
- Array data transferred to CPU for formatting
"""
def array_str(a, max_line_width=None, precision=None, suppress_small=None):
"""
Return string representation of array data.
Parameters:
- a: ndarray, input array
- max_line_width: int, maximum characters per line
- precision: int, floating point precision
- suppress_small: bool, suppress small values
Returns:
- str: String representation of array contents
Notes:
- Creates str() representation for display
- Does not include array constructor syntax
- Formatted for human readability
"""
def array2string(a, max_line_width=None, precision=None, suppress_small=None, separator=' ', prefix='', style=float64, formatter=None, threshold=None, edgeitems=None, sign=None, floatmode=None, suffix='', legacy=None):
"""
Return string representation with full formatting control.
Parameters:
- a: ndarray, input array
- max_line_width: int, maximum line width
- precision: int, floating point precision
- suppress_small: bool, suppress small values
- separator: str, element separator
- prefix: str, prefix for each line
- style: callable, deprecated formatting function
- formatter: dict, custom formatters for different types
- threshold: int, total items before summarizing
- edgeitems: int, items at each edge when summarizing
- sign: str, control sign printing ('+', '-', ' ')
- floatmode: str, floating point format mode
- suffix: str, suffix for each line
- legacy: str, compatibility mode
Returns:
- str: Formatted string representation
Notes:
- Most flexible formatting function
- Supports custom formatters for different data types
- Handles large arrays with summarization
"""
def format_float_positional(x, precision=None, unique=True, fractional=True, trim='k', sign=False, pad_left=None, pad_right=None):
"""
Format float in positional notation.
Parameters:
- x: float, value to format
- precision: int, number of digits after decimal
- unique: bool, use minimum precision for unique representation
- fractional: bool, use fractional precision mode
- trim: str, trim trailing zeros ('k', '0', '.')
- sign: bool, always show sign
- pad_left: int, minimum total width
- pad_right: int, pad right side
Returns:
- str: Formatted float string
"""
def format_float_scientific(x, precision=None, unique=True, trim='k', sign=False, pad_left=None, exp_digits=None):
"""
Format float in scientific notation.
Parameters:
- x: float, value to format
- precision: int, number of digits after decimal
- unique: bool, use minimum precision for unique representation
- trim: str, trim trailing zeros
- sign: bool, always show sign
- pad_left: int, minimum total width
- exp_digits: int, minimum exponent digits
Returns:
- str: Formatted float string in scientific notation
"""import cupy as cp
# Create sample data
data = cp.random.random((1000, 100))
labels = cp.random.randint(0, 10, 1000)
# Save arrays to files
cp.save('data.npy', data)
cp.savez('dataset.npz', features=data, labels=labels)
cp.savez_compressed('dataset_compressed.npz', features=data, labels=labels)
# Load arrays from files
loaded_data = cp.load('data.npy')
archive = cp.load('dataset.npz')
features = archive['features']
labels = archive['labels']
print(f"Original shape: {data.shape}, Loaded shape: {loaded_data.shape}")
print(f"Data matches: {cp.allclose(data, loaded_data)}")import cupy as cp
# Save data to text file
data = cp.array([[1.1, 2.2, 3.3],
[4.4, 5.5, 6.6],
[7.7, 8.8, 9.9]])
cp.savetxt('data.txt', data, delimiter=',', header='col1,col2,col3', fmt='%.2f')
# Load data from text file
loaded = cp.loadtxt('data.txt', delimiter=',', skiprows=1)
print(f"Text data shape: {loaded.shape}")
# Handle CSV with mixed data types using genfromtxt
# Assuming file with columns: name, age, score
mixed_data = cp.genfromtxt('mixed_data.csv',
delimiter=',',
names=True,
dtype=None,
encoding='utf-8')import cupy as cp
# Create array for formatting examples
arr = cp.array([[1.23456789, 2.87654321],
[0.00000012, 999999.999]])
# Different representation formats
print("Default repr:")
print(cp.array_repr(arr))
print("\nCustom precision:")
print(cp.array_str(arr, precision=2))
print("\nScientific notation:")
print(cp.array2string(arr, formatter={'float': '{:.2e}'.format}))
# Format individual floats
value = 123.456789
positional = cp.format_float_positional(value, precision=2)
scientific = cp.format_float_scientific(value, precision=2)
print(f"Positional: {positional}, Scientific: {scientific}")import cupy as cp
import numpy as np
# CPU to GPU workflow
cpu_data = np.random.random((10000, 1000))
# Method 1: Direct conversion
gpu_data = cp.asarray(cpu_data)
# Method 2: Save/load (useful for large datasets)
np.save('temp_data.npy', cpu_data)
gpu_data = cp.load('temp_data.npy')
# GPU to CPU workflow
result = cp.random.random((1000, 1000))
# Method 1: Direct conversion
cpu_result = cp.asnumpy(result)
# Method 2: Save to file
cp.save('gpu_result.npy', result)
# Later load on CPU
cpu_result = np.load('gpu_result.npy')
# Verify data integrity
print(f"Data preserved: {np.allclose(cpu_data, cp.asnumpy(gpu_data))}")import cupy as cp
import os
# Process multiple files
file_pattern = 'data_batch_*.npy'
results = []
for filename in sorted(os.glob(file_pattern)):
# Load batch
batch = cp.load(filename)
# Process on GPU
processed = cp.fft.fft2(batch)
result = cp.abs(processed).mean(axis=(1,2))
results.append(result)
# Combine results and save
final_result = cp.concatenate(results)
cp.save('processed_results.npy', final_result)
# Save processing log as text
processing_info = cp.array([len(results), final_result.shape[0], final_result.mean()])
cp.savetxt('processing_log.txt', processing_info,
header='num_batches,total_samples,mean_value',
fmt='%.6f')Install with Tessl CLI
npx tessl i tessl/pypi-cupy-cuda111