High-performance array manipulation functions for remapping, masking, renumbering, and transposing 3D labeled images and point clouds.
—
Efficient binary serialization of chunked arrays for storage and network transfer. This functionality provides optimized methods for converting array data to binary format with chunking support, essential for large-scale data processing and distributed computing workflows.
Compute binary representation of an image divided into a grid of cutouts, with optimized performance for specific memory layouts.
def tobytes(
image: NDArray,
chunk_size: tuple[int, int, int],
order: str = "C"
) -> list[bytes]:
"""
Compute bytes with image divided into grid of cutouts.
Args:
image: Input image array
chunk_size: Size of each chunk (x, y, z)
order: Memory order ("C" or "F", default: "C")
Returns:
Resultant binaries indexed by gridpoint in fortran order
"""Usage Example:
import fastremap
import numpy as np
# Create a sample 3D image
image = np.random.randint(0, 255, size=(128, 128, 64), dtype=np.uint8)
# Divide into 64x64x64 chunks and serialize
chunk_size = (64, 64, 64)
binaries = fastremap.tobytes(image, chunk_size, order="C")
# Result is a list of bytes objects
print(f"Number of chunks: {len(binaries)}")
print(f"First chunk size: {len(binaries[0])} bytes")
# For Fortran-ordered output
binaries_f = fastremap.tobytes(image, chunk_size, order="F")The tobytes function is significantly optimized for specific conditions:
tobytes on each chunk individuallyPerformance Example:
import fastremap
import numpy as np
import time
# Large Fortran-ordered image
large_image = np.random.random((512, 512, 256)).astype(np.float32, order='F')
chunk_size = (64, 64, 64)
# Optimized fastremap approach
start = time.time()
fast_chunks = fastremap.tobytes(large_image, chunk_size, order="F")
fast_time = time.time() - start
# Manual chunking approach (for comparison)
start = time.time()
manual_chunks = []
for z in range(0, 256, 64):
for y in range(0, 512, 64):
for x in range(0, 512, 64):
chunk = large_image[x:x+64, y:y+64, z:z+64]
manual_chunks.append(chunk.tobytes(order='F'))
manual_time = time.time() - start
print(f"fastremap time: {fast_time:.3f}s")
print(f"Manual time: {manual_time:.3f}s")
print(f"Speedup: {manual_time/fast_time:.1f}x faster")import fastremap
import numpy as np
# Prepare large dataset for distributed processing
dataset = np.random.random((1024, 1024, 512)).astype(np.float32)
# Chunk into manageable pieces for worker nodes
chunk_size = (128, 128, 128)
chunks = fastremap.tobytes(dataset, chunk_size, order="C")
# Each chunk can now be sent to different worker processes
for i, chunk_data in enumerate(chunks):
# Send chunk_data to worker i
# worker_pool.submit(process_chunk, chunk_data, i)
passimport fastremap
import numpy as np
import pickle
# Large scientific dataset
data = np.random.random((2048, 2048, 1024)).astype(np.float32)
# Chunk and serialize for efficient storage
chunk_size = (256, 256, 256)
serialized_chunks = fastremap.tobytes(data, chunk_size, order="F")
# Store chunks efficiently
metadata = {
'original_shape': data.shape,
'chunk_size': chunk_size,
'dtype': data.dtype,
'order': 'F',
'num_chunks': len(serialized_chunks)
}
# Save metadata and chunks
with open('data_metadata.pkl', 'wb') as f:
pickle.dump(metadata, f)
for i, chunk in enumerate(serialized_chunks):
with open(f'chunk_{i:04d}.bin', 'wb') as f:
f.write(chunk)import fastremap
import numpy as np
# For C-contiguous arrays, use C order for best performance
c_array = np.random.random((100, 200, 300)).astype(np.float32, order='C')
c_chunks = fastremap.tobytes(c_array, (50, 50, 50), order="C") # Optimal
# For F-contiguous arrays, use F order for best performance
f_array = np.random.random((100, 200, 300)).astype(np.float32, order='F')
f_chunks = fastremap.tobytes(f_array, (50, 50, 50), order="F") # Optimal
# Mixed orders work but may be slower
mixed_chunks = fastremap.tobytes(c_array, (50, 50, 50), order="F") # SuboptimalNDArray = np.ndarrayInstall with Tessl CLI
npx tessl i tessl/pypi-fastremap