Fast Base64 encoding/decoding library with SIMD optimizations
npx @tessl/cli install tessl/pypi-pybase64@1.4.0Fast Base64 encoding/decoding library that provides a high-performance wrapper around the optimized libbase64 C library. PyBase64 offers the same API as Python's built-in base64 module for easy integration while delivering significantly faster performance through SIMD optimizations (AVX2, AVX512-VBMI, Neon) and native C implementations.
pip install pybase64pybase64 command or python -m pybase64import pybase64For specific functions:
from pybase64 import b64encode, b64decode, standard_b64encode, urlsafe_b64decodeimport pybase64
# Basic encoding/decoding
data = b'Hello, World!'
encoded = pybase64.b64encode(data)
decoded = pybase64.b64decode(encoded)
print(encoded) # b'SGVsbG8sIFdvcmxkIQ=='
print(decoded) # b'Hello, World!'
# URL-safe encoding
url_encoded = pybase64.urlsafe_b64encode(data)
url_decoded = pybase64.urlsafe_b64decode(url_encoded)
# Custom alphabet
custom_encoded = pybase64.b64encode(data, altchars=b'_:')
custom_decoded = pybase64.b64decode(custom_encoded, altchars=b'_:')
# Validation for security-critical applications
secure_decoded = pybase64.b64decode(encoded, validate=True)
# Version and performance info
print(pybase64.get_version()) # Shows SIMD optimizations in usePyBase64 provides a dual-implementation architecture for optimal performance:
_pybase64): High-performance implementation using libbase64 with SIMD optimizations_fallback): Pure Python implementation using built-in base64 module when C extension unavailableThis design ensures maximum performance when possible while maintaining compatibility across all Python environments including PyPy and free-threaded builds.
Primary Base64 encoding functions with full alphabet customization and optimal performance through C extensions.
def b64encode(s: Buffer, altchars: str | Buffer | None = None) -> bytes:
"""
Encode bytes using Base64 alphabet.
Parameters:
- s: bytes-like object to encode
- altchars: optional 2-character string/bytes for custom alphabet (replaces '+' and '/')
Returns:
bytes: Base64 encoded data
Raises:
BufferError: if buffer is not C-contiguous
TypeError: for invalid input types
ValueError: for non-ASCII strings in altchars
"""
def b64encode_as_string(s: Buffer, altchars: str | Buffer | None = None) -> str:
"""
Encode bytes using Base64 alphabet, return as string.
Parameters:
- s: bytes-like object to encode
- altchars: optional 2-character string/bytes for custom alphabet
Returns:
str: Base64 encoded data as ASCII string
"""
def encodebytes(s: Buffer) -> bytes:
"""
Encode bytes with MIME-style line breaks every 76 characters.
Parameters:
- s: bytes-like object to encode
Returns:
bytes: Base64 encoded data with newlines per RFC 2045 (MIME)
"""Base64 decoding functions with validation options and alternative alphabet support for maximum security and flexibility.
def b64decode(s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False) -> bytes:
"""
Decode Base64 encoded data.
Parameters:
- s: string or bytes-like object to decode
- altchars: optional 2-character alternative alphabet
- validate: if True, strictly validate input (recommended for security)
Returns:
bytes: decoded data
Raises:
binascii.Error: for invalid padding or characters (when validate=True)
"""
def b64decode_as_bytearray(s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False) -> bytearray:
"""
Decode Base64 encoded data, return as bytearray.
Parameters:
- s: string or bytes-like object to decode
- altchars: optional 2-character alternative alphabet
- validate: if True, strictly validate input
Returns:
bytearray: decoded data as mutable bytearray
Raises:
binascii.Error: for invalid padding or characters (when validate=True)
"""Convenience functions for standard Base64 alphabet encoding/decoding, compatible with Python's base64 module.
def standard_b64encode(s: Buffer) -> bytes:
"""
Encode using standard Base64 alphabet (+/).
Parameters:
- s: bytes-like object to encode
Returns:
bytes: standard Base64 encoded data
"""
def standard_b64decode(s: str | Buffer) -> bytes:
"""
Decode standard Base64 encoded data.
Parameters:
- s: string or bytes-like object to decode
Returns:
bytes: decoded data
Raises:
binascii.Error: for invalid input
"""URL and filesystem safe Base64 encoding/decoding using modified alphabet (-_ instead of +/) for web applications and file names.
def urlsafe_b64encode(s: Buffer) -> bytes:
"""
Encode using URL-safe Base64 alphabet (-_).
Parameters:
- s: bytes-like object to encode
Returns:
bytes: URL-safe Base64 encoded data
"""
def urlsafe_b64decode(s: str | Buffer) -> bytes:
"""
Decode URL-safe Base64 encoded data.
Parameters:
- s: string or bytes-like object to decode
Returns:
bytes: decoded data
Raises:
binascii.Error: for invalid input
"""Version and license information functions for runtime introspection and compliance reporting.
def get_version() -> str:
"""
Get pybase64 version with optimization status.
Returns:
str: version string with C extension and SIMD status
e.g., "1.4.2 (C extension active - AVX2)"
"""
def get_license_text() -> str:
"""
Get complete license information.
Returns:
str: license text including libbase64 license information
"""Internal functions for SIMD optimization control and introspection (available when C extension is active).
def _get_simd_flags_compile() -> int:
"""
Get compile-time SIMD flags used when building the C extension.
Returns:
int: bitmask of SIMD instruction sets available at compile time
"""
def _get_simd_flags_runtime() -> int:
"""
Get runtime SIMD flags detected on current CPU.
Returns:
int: bitmask of SIMD instruction sets available at runtime
"""
def _get_simd_name(flags: int) -> str:
"""
Get human-readable name for SIMD instruction set.
Parameters:
- flags: SIMD flags bitmask
Returns:
str: SIMD instruction set name (e.g., "AVX2", "fallback")
"""
def _get_simd_path() -> int:
"""
Get currently active SIMD path flags.
Returns:
int: active SIMD flags for current execution path
"""
def _set_simd_path(flags: int) -> None:
"""
Set SIMD path for optimization (advanced users only).
Parameters:
- flags: SIMD flags to activate
Note: Only available when C extension is active
"""PyBase64 provides a comprehensive command-line tool for encoding, decoding, and benchmarking Base64 operations.
# Main command with version and help
pybase64 --version
pybase64 --license
pybase64 -h
# Encoding subcommand
pybase64 encode <input_file> [-o <output_file>] [-u|--url] [-a <altchars>]
# Decoding subcommand
pybase64 decode <input_file> [-o <output_file>] [-u|--url] [-a <altchars>] [--no-validation]
# Benchmarking subcommand
pybase64 benchmark <input_file> [-d <duration>]The CLI can also be invoked using Python module syntax:
python -m pybase64 <subcommand> [arguments...]Package version and exported symbols for version checking and introspection.
__version__: str # Package version string
__all__: tuple[str, ...] # Exported public API symbols# Type alias for bytes-like objects (version-dependent import)
if sys.version_info < (3, 12):
from typing_extensions import Buffer
else:
from collections.abc import Buffer
# Protocol for decode functions
class Decode(Protocol):
__name__: str
__module__: str
def __call__(self, s: str | Buffer, altchars: str | Buffer | None = None, validate: bool = False) -> bytes: ...
# Protocol for encode functions
class Encode(Protocol):
__name__: str
__module__: str
def __call__(self, s: Buffer, altchars: Buffer | None = None) -> bytes: ...
# Protocol for encodebytes-style functions
class EncodeBytes(Protocol):
__name__: str
__module__: str
def __call__(self, s: Buffer) -> bytes: ...import pybase64
# For maximum security and performance, use validate=True
# This enables optimized validation in the C extension
data = b'SGVsbG8sIFdvcmxkIQ=='
decoded = pybase64.b64decode(data, validate=True)import pybase64
# Create data with custom alphabet for specific protocols
data = b'binary data here'
encoded = pybase64.b64encode(data, altchars=b'@&')
# Result uses @ and & instead of + and /
# Decode with same custom alphabet
decoded = pybase64.b64decode(encoded, altchars=b'@&')import pybase64
# Encode with line breaks for email/MIME compatibility
large_data = b'x' * 200 # Large binary data
mime_encoded = pybase64.encodebytes(large_data)
# Result has newlines every 76 characters per RFC 2045import pybase64
# Check if C extension and SIMD optimizations are active
version_info = pybase64.get_version()
print(version_info)
# Output examples:
# "1.4.2 (C extension active - AVX2)"
# "1.4.2 (C extension inactive)" # Fallback mode# Encode a file using standard Base64
pybase64 encode input.txt -o encoded.txt
# Decode with validation (recommended for security)
pybase64 decode encoded.txt -o decoded.txt
# URL-safe encoding for web applications
pybase64 encode data.bin -u -o urlsafe.txt
# Custom alphabet encoding
pybase64 encode data.bin -a '@&' -o custom.txt
# Benchmark performance on your system
pybase64 benchmark test_data.bin
# Pipe operations (using stdin/stdout)
echo "Hello World" | pybase64 encode -
cat encoded.txt | pybase64 decode - > decoded.txt
# Check version and license
pybase64 --version
pybase64 --license
# Using Python module syntax
python -m pybase64 encode input.txtAll decoding functions may raise binascii.Error for:
validate=True)Encoding functions may raise:
BufferError for non-contiguous memory buffersTypeError for invalid input typesValueError for non-ASCII characters in custom alphabetsvalidate=True for security-critical applications - it's optimized in the C extensionb64decode and b64encode directly rather than wrapper functions