tessl/pypi-bitarray

Efficient arrays of booleans with comprehensive sequence operations, bitwise operations, and specialized functionality for encoding/decoding variable-length prefix codes.

—

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview

Eval results

Files

Advanced Features

Name: tessl/pypi-bitarray
Author: tessl

Specialized functionality for encoding/decoding variable-length prefix codes, Huffman coding, compression algorithms, and advanced bit manipulation techniques.

Capabilities

Variable-Length Prefix Coding

Bitarray provides efficient support for encoding and decoding variable-length prefix codes, commonly used in compression algorithms and data transmission protocols.

def encode(self, code: dict, iterable) -> None:
    """
    Encode objects using variable-length prefix codes and append to bitarray.
    
    Args:
        code: Dictionary mapping objects to bitarray codes
        iterable: Sequence of objects to encode
    """

def decode(self, code: Union[dict, decodetree]) -> Iterator:
    """
    Decode bitarray using variable-length prefix codes.
    
    Args:
        code: Dictionary or decodetree for decoding
        
    Yields:
        Decoded objects from the bitarray
    """

Usage Examples:

from bitarray import bitarray

# Create encoding dictionary
code = {
    'A': bitarray('00'),
    'B': bitarray('01'), 
    'C': bitarray('10'),
    'D': bitarray('11')
}

# Encoding
a = bitarray()
a.encode(code, 'ABCD')       # Encodes to '00011011'
a.encode(code, ['A', 'C'])   # Append 'AC' -> '0010'

# Decoding
decoded = list(a.decode(code))  # ['A', 'B', 'C', 'D', 'A', 'C']

# More complex example with text
text_code = {
    ' ': bitarray('00'),
    'e': bitarray('010'),
    't': bitarray('011'),
    'a': bitarray('100'),
    'o': bitarray('101'),
    'i': bitarray('110'),
    'n': bitarray('1110'),
    's': bitarray('1111')
}

message = bitarray()
message.encode(text_code, "tea")
decoded_text = ''.join(message.decode(text_code))  # "tea"

Decode Trees

The decodetree class provides an optimized structure for decoding variable-length prefix codes, offering better performance than dictionary-based decoding for large code tables.

class decodetree:
    """Optimized tree structure for decoding variable-length prefix codes"""
    
    def __init__(self, code: dict) -> None:
        """
        Create decode tree from code dictionary.
        
        Args:
            code: Dictionary mapping objects to bitarray codes
        """
    
    def complete(self) -> bool:
        """
        Check if decode tree is complete (prefix-free).
        
        Returns:
            True if tree represents a complete prefix code
        """
    
    def nodes(self) -> int:
        """
        Get number of internal nodes in tree.
        
        Returns:
            Number of internal nodes
        """
    
    def todict(self) -> dict:
        """
        Convert decode tree back to dictionary format.
        
        Returns:
            Dictionary mapping bitarray codes to objects
        """

Usage Examples:

from bitarray import bitarray, decodetree

# Create code dictionary
code = {
    'frequent': bitarray('0'),
    'common': bitarray('10'),
    'rare': bitarray('110'),
    'very_rare': bitarray('111')
}

# Create optimized decode tree
tree = decodetree(code)

# Tree analysis
print(tree.complete())    # True (complete prefix code)
print(tree.nodes())       # Number of internal nodes

# Efficient decoding with tree
message = bitarray('0101110')
decoded = list(message.decode(tree))  # ['frequent', 'common', 'very_rare']

# Convert back to dictionary if needed
code_dict = tree.todict()  # Inverse mapping

Huffman Coding

Functions for generating and using Huffman codes, which provide optimal variable-length encoding for known symbol frequencies.

def huffman_code(freq_map: Union[dict, Counter], endian: Optional[str] = None) -> dict:
    """
    Generate Huffman codes from frequency map.
    
    Args:
        freq_map: Dictionary or Counter mapping symbols to frequencies
        endian: Bit-endianness for generated codes
        
    Returns:
        Dictionary mapping symbols to bitarray codes
    """

def canonical_huffman(freq_map: Union[dict, Counter]) -> tuple[dict, list, list]:
    """
    Generate canonical Huffman codes.
    
    Args:
        freq_map: Dictionary or Counter mapping symbols to frequencies
        
    Returns:
        Tuple of (code_dict, count_list, symbol_list) for canonical encoding
    """

def canonical_decode(a: bitarray, count: list[int], symbol: list) -> Iterator:
    """
    Decode using canonical Huffman codes.
    
    Args:
        a: Bitarray to decode
        count: List of code counts by length (from canonical_huffman)
        symbol: List of symbols in canonical order (from canonical_huffman)
        
    Yields:
        Decoded symbols
    """

Usage Examples:

from bitarray import bitarray
from bitarray.util import huffman_code, canonical_huffman, canonical_decode
from collections import Counter

# Character frequencies in English text
frequencies = {
    'e': 127, 't': 90, 'a': 82, 'o': 75, 'i': 70, 'n': 67,
    's': 63, 'h': 61, 'r': 60, 'd': 43, 'l': 40, 'c': 28,
    'u': 28, 'm': 24, 'w': 23, 'f': 22, 'g': 20, 'y': 20,
    'p': 19, 'b': 13, 'v': 10, 'k': 8, 'j': 2, 'x': 2,
    'q': 1, 'z': 1
}

# Generate Huffman codes
code = huffman_code(frequencies)

# Most frequent characters get shorter codes
print(f"'e': {code['e'].to01()}")  # Short code for 'e'
print(f"'z': {code['z'].to01()}")  # Longer code for 'z'

# Encode text
text = "hello world"
encoded = bitarray()
encoded.encode(code, text)

# Decode back
decoded_text = ''.join(encoded.decode(code))
print(decoded_text == text)  # True

# Canonical Huffman (standardized format)
canon_code, count, symbol = canonical_huffman(frequencies)
encoded_canon = bitarray()
encoded_canon.encode(canon_code, text)

# Canonical decoding
decoded_canon = list(canonical_decode(encoded_canon, count, symbol))
print(''.join(decoded_canon) == text)  # True

Advanced Encoding Techniques

Additional encoding and compression techniques for specialized use cases.

# From utility module - already covered in detail in utility-functions.md
def sc_encode(a: bitarray) -> bytes:
    """Sparse compression - optimal for arrays with few set bits"""

def sc_decode(stream: Iterable[int]) -> bitarray:
    """Decode sparse-compressed data"""

def vl_encode(a: bitarray) -> bytes:
    """Variable-length encoding for general compression"""

def vl_decode(stream: Iterable[int], endian: Optional[str] = None) -> bitarray:
    """Variable-length decoding"""

Practical Applications

Here are complete examples showing how these advanced features work together for real-world applications:

Text Compression Example:

from bitarray import bitarray
from bitarray.util import huffman_code
from collections import Counter

def compress_text(text: str) -> tuple[bitarray, dict]:
    """Compress text using Huffman coding"""
    # Analyze character frequencies
    frequencies = Counter(text)
    
    # Generate optimal codes
    code = huffman_code(frequencies)
    
    # Encode text
    compressed = bitarray()
    compressed.encode(code, text)
    
    return compressed, code

def decompress_text(compressed: bitarray, code: dict) -> str:
    """Decompress Huffman-coded text"""
    return ''.join(compressed.decode(code))

# Example usage
original_text = "this is a test message for compression"
compressed_bits, encoding = compress_text(original_text)
decompressed_text = decompress_text(compressed_bits, encoding)

print(f"Original: {len(original_text * 8)} bits")  # 8 bits per ASCII char
print(f"Compressed: {len(compressed_bits)} bits")
print(f"Compression ratio: {len(compressed_bits) / (len(original_text) * 8):.2f}")
print(f"Match: {original_text == decompressed_text}")

Network Protocol Example:

from bitarray import bitarray, decodetree

def create_protocol_decoder():
    """Create decoder for a hypothetical network protocol"""
    # Define protocol message codes
    protocol_codes = {
        'START': bitarray('000'),
        'DATA': bitarray('001'), 
        'ACK': bitarray('010'),
        'NACK': bitarray('011'),
        'END': bitarray('100'),
        'ERROR': bitarray('101')
    }
    
    # Create optimized decoder tree
    return decodetree(protocol_codes)

def encode_message(commands: list[str]) -> bitarray:
    """Encode protocol message"""
    code = {
        'START': bitarray('000'), 'DATA': bitarray('001'),
        'ACK': bitarray('010'), 'NACK': bitarray('011'),
        'END': bitarray('100'), 'ERROR': bitarray('101')
    }
    
    message = bitarray()
    message.encode(code, commands)
    return message

def decode_message(message: bitarray, decoder: decodetree) -> list[str]:
    """Decode protocol message"""
    return list(message.decode(decoder))

# Example protocol usage
decoder = create_protocol_decoder()
commands = ['START', 'DATA', 'DATA', 'ACK', 'END']
encoded = encode_message(commands)
decoded = decode_message(encoded, decoder)

print(f"Commands: {commands}")
print(f"Encoded: {encoded.to01()}")
print(f"Decoded: {decoded}")
print(f"Match: {commands == decoded}")

Data Stream Processing:

from bitarray import bitarray
from bitarray.util import serialize, deserialize

class BitStreamProcessor:
    """Process streams of bit data with encoding/decoding"""
    
    def __init__(self, chunk_size: int = 1024):
        self.chunk_size = chunk_size
        self.buffer = bitarray()
    
    def add_data(self, data: bitarray) -> None:
        """Add data to processing buffer"""
        self.buffer.extend(data)
    
    def process_chunks(self, decoder: decodetree) -> list:
        """Process complete chunks from buffer"""
        results = []
        
        while len(self.buffer) >= self.chunk_size:
            # Extract chunk
            chunk = self.buffer[:self.chunk_size]
            self.buffer = self.buffer[self.chunk_size:]
            
            # Process chunk
            decoded = list(chunk.decode(decoder))
            results.extend(decoded)
        
        return results
    
    def save_state(self) -> bytes:
        """Serialize current buffer state"""
        return serialize(self.buffer)
    
    def restore_state(self, state: bytes) -> None:
        """Restore buffer from serialized state"""
        self.buffer = deserialize(state)

# Example usage
processor = BitStreamProcessor(chunk_size=32)
decoder = create_protocol_decoder()

# Simulate streaming data
stream_data = encode_message(['START', 'DATA'] * 10)
processor.add_data(stream_data)

# Process available chunks
results = processor.process_chunks(decoder)
print(f"Processed {len(results)} commands")

# Save and restore state
state = processor.save_state()
processor.restore_state(state)