Efficient arrays of booleans with comprehensive sequence operations, bitwise operations, and specialized functionality for encoding/decoding variable-length prefix codes.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Specialized functionality for encoding/decoding variable-length prefix codes, Huffman coding, compression algorithms, and advanced bit manipulation techniques.
Bitarray provides efficient support for encoding and decoding variable-length prefix codes, commonly used in compression algorithms and data transmission protocols.
def encode(self, code: dict, iterable) -> None:
"""
Encode objects using variable-length prefix codes and append to bitarray.
Args:
code: Dictionary mapping objects to bitarray codes
iterable: Sequence of objects to encode
"""
def decode(self, code: Union[dict, decodetree]) -> Iterator:
"""
Decode bitarray using variable-length prefix codes.
Args:
code: Dictionary or decodetree for decoding
Yields:
Decoded objects from the bitarray
"""Usage Examples:
from bitarray import bitarray
# Create encoding dictionary
code = {
'A': bitarray('00'),
'B': bitarray('01'),
'C': bitarray('10'),
'D': bitarray('11')
}
# Encoding
a = bitarray()
a.encode(code, 'ABCD') # Encodes to '00011011'
a.encode(code, ['A', 'C']) # Append 'AC' -> '0010'
# Decoding
decoded = list(a.decode(code)) # ['A', 'B', 'C', 'D', 'A', 'C']
# More complex example with text
text_code = {
' ': bitarray('00'),
'e': bitarray('010'),
't': bitarray('011'),
'a': bitarray('100'),
'o': bitarray('101'),
'i': bitarray('110'),
'n': bitarray('1110'),
's': bitarray('1111')
}
message = bitarray()
message.encode(text_code, "tea")
decoded_text = ''.join(message.decode(text_code)) # "tea"The decodetree class provides an optimized structure for decoding variable-length prefix codes, offering better performance than dictionary-based decoding for large code tables.
class decodetree:
"""Optimized tree structure for decoding variable-length prefix codes"""
def __init__(self, code: dict) -> None:
"""
Create decode tree from code dictionary.
Args:
code: Dictionary mapping objects to bitarray codes
"""
def complete(self) -> bool:
"""
Check if decode tree is complete (prefix-free).
Returns:
True if tree represents a complete prefix code
"""
def nodes(self) -> int:
"""
Get number of internal nodes in tree.
Returns:
Number of internal nodes
"""
def todict(self) -> dict:
"""
Convert decode tree back to dictionary format.
Returns:
Dictionary mapping bitarray codes to objects
"""Usage Examples:
from bitarray import bitarray, decodetree
# Create code dictionary
code = {
'frequent': bitarray('0'),
'common': bitarray('10'),
'rare': bitarray('110'),
'very_rare': bitarray('111')
}
# Create optimized decode tree
tree = decodetree(code)
# Tree analysis
print(tree.complete()) # True (complete prefix code)
print(tree.nodes()) # Number of internal nodes
# Efficient decoding with tree
message = bitarray('0101110')
decoded = list(message.decode(tree)) # ['frequent', 'common', 'very_rare']
# Convert back to dictionary if needed
code_dict = tree.todict() # Inverse mappingFunctions for generating and using Huffman codes, which provide optimal variable-length encoding for known symbol frequencies.
def huffman_code(freq_map: Union[dict, Counter], endian: Optional[str] = None) -> dict:
"""
Generate Huffman codes from frequency map.
Args:
freq_map: Dictionary or Counter mapping symbols to frequencies
endian: Bit-endianness for generated codes
Returns:
Dictionary mapping symbols to bitarray codes
"""
def canonical_huffman(freq_map: Union[dict, Counter]) -> tuple[dict, list, list]:
"""
Generate canonical Huffman codes.
Args:
freq_map: Dictionary or Counter mapping symbols to frequencies
Returns:
Tuple of (code_dict, count_list, symbol_list) for canonical encoding
"""
def canonical_decode(a: bitarray, count: list[int], symbol: list) -> Iterator:
"""
Decode using canonical Huffman codes.
Args:
a: Bitarray to decode
count: List of code counts by length (from canonical_huffman)
symbol: List of symbols in canonical order (from canonical_huffman)
Yields:
Decoded symbols
"""Usage Examples:
from bitarray import bitarray
from bitarray.util import huffman_code, canonical_huffman, canonical_decode
from collections import Counter
# Character frequencies in English text
frequencies = {
'e': 127, 't': 90, 'a': 82, 'o': 75, 'i': 70, 'n': 67,
's': 63, 'h': 61, 'r': 60, 'd': 43, 'l': 40, 'c': 28,
'u': 28, 'm': 24, 'w': 23, 'f': 22, 'g': 20, 'y': 20,
'p': 19, 'b': 13, 'v': 10, 'k': 8, 'j': 2, 'x': 2,
'q': 1, 'z': 1
}
# Generate Huffman codes
code = huffman_code(frequencies)
# Most frequent characters get shorter codes
print(f"'e': {code['e'].to01()}") # Short code for 'e'
print(f"'z': {code['z'].to01()}") # Longer code for 'z'
# Encode text
text = "hello world"
encoded = bitarray()
encoded.encode(code, text)
# Decode back
decoded_text = ''.join(encoded.decode(code))
print(decoded_text == text) # True
# Canonical Huffman (standardized format)
canon_code, count, symbol = canonical_huffman(frequencies)
encoded_canon = bitarray()
encoded_canon.encode(canon_code, text)
# Canonical decoding
decoded_canon = list(canonical_decode(encoded_canon, count, symbol))
print(''.join(decoded_canon) == text) # TrueAdditional encoding and compression techniques for specialized use cases.
# From utility module - already covered in detail in utility-functions.md
def sc_encode(a: bitarray) -> bytes:
"""Sparse compression - optimal for arrays with few set bits"""
def sc_decode(stream: Iterable[int]) -> bitarray:
"""Decode sparse-compressed data"""
def vl_encode(a: bitarray) -> bytes:
"""Variable-length encoding for general compression"""
def vl_decode(stream: Iterable[int], endian: Optional[str] = None) -> bitarray:
"""Variable-length decoding"""Here are complete examples showing how these advanced features work together for real-world applications:
Text Compression Example:
from bitarray import bitarray
from bitarray.util import huffman_code
from collections import Counter
def compress_text(text: str) -> tuple[bitarray, dict]:
"""Compress text using Huffman coding"""
# Analyze character frequencies
frequencies = Counter(text)
# Generate optimal codes
code = huffman_code(frequencies)
# Encode text
compressed = bitarray()
compressed.encode(code, text)
return compressed, code
def decompress_text(compressed: bitarray, code: dict) -> str:
"""Decompress Huffman-coded text"""
return ''.join(compressed.decode(code))
# Example usage
original_text = "this is a test message for compression"
compressed_bits, encoding = compress_text(original_text)
decompressed_text = decompress_text(compressed_bits, encoding)
print(f"Original: {len(original_text * 8)} bits") # 8 bits per ASCII char
print(f"Compressed: {len(compressed_bits)} bits")
print(f"Compression ratio: {len(compressed_bits) / (len(original_text) * 8):.2f}")
print(f"Match: {original_text == decompressed_text}")Network Protocol Example:
from bitarray import bitarray, decodetree
def create_protocol_decoder():
"""Create decoder for a hypothetical network protocol"""
# Define protocol message codes
protocol_codes = {
'START': bitarray('000'),
'DATA': bitarray('001'),
'ACK': bitarray('010'),
'NACK': bitarray('011'),
'END': bitarray('100'),
'ERROR': bitarray('101')
}
# Create optimized decoder tree
return decodetree(protocol_codes)
def encode_message(commands: list[str]) -> bitarray:
"""Encode protocol message"""
code = {
'START': bitarray('000'), 'DATA': bitarray('001'),
'ACK': bitarray('010'), 'NACK': bitarray('011'),
'END': bitarray('100'), 'ERROR': bitarray('101')
}
message = bitarray()
message.encode(code, commands)
return message
def decode_message(message: bitarray, decoder: decodetree) -> list[str]:
"""Decode protocol message"""
return list(message.decode(decoder))
# Example protocol usage
decoder = create_protocol_decoder()
commands = ['START', 'DATA', 'DATA', 'ACK', 'END']
encoded = encode_message(commands)
decoded = decode_message(encoded, decoder)
print(f"Commands: {commands}")
print(f"Encoded: {encoded.to01()}")
print(f"Decoded: {decoded}")
print(f"Match: {commands == decoded}")Data Stream Processing:
from bitarray import bitarray
from bitarray.util import serialize, deserialize
class BitStreamProcessor:
"""Process streams of bit data with encoding/decoding"""
def __init__(self, chunk_size: int = 1024):
self.chunk_size = chunk_size
self.buffer = bitarray()
def add_data(self, data: bitarray) -> None:
"""Add data to processing buffer"""
self.buffer.extend(data)
def process_chunks(self, decoder: decodetree) -> list:
"""Process complete chunks from buffer"""
results = []
while len(self.buffer) >= self.chunk_size:
# Extract chunk
chunk = self.buffer[:self.chunk_size]
self.buffer = self.buffer[self.chunk_size:]
# Process chunk
decoded = list(chunk.decode(decoder))
results.extend(decoded)
return results
def save_state(self) -> bytes:
"""Serialize current buffer state"""
return serialize(self.buffer)
def restore_state(self, state: bytes) -> None:
"""Restore buffer from serialized state"""
self.buffer = deserialize(state)
# Example usage
processor = BitStreamProcessor(chunk_size=32)
decoder = create_protocol_decoder()
# Simulate streaming data
stream_data = encode_message(['START', 'DATA'] * 10)
processor.add_data(stream_data)
# Process available chunks
results = processor.process_chunks(decoder)
print(f"Processed {len(results)} commands")
# Save and restore state
state = processor.save_state()
processor.restore_state(state)Advanced features are optimized for different use cases:
Choose the appropriate technique based on your data characteristics and performance requirements.
Install with Tessl CLI
npx tessl i tessl/pypi-bitarray