Pretrained models for Keras with multi-framework compatibility.
—
Reusable neural network layers and components for building custom models or extending existing architectures. Keras Hub provides both general-purpose layers and specialized components for transformer models.
Core transformer architecture components for building attention-based models.
class TransformerEncoder:
"""Transformer encoder block with multi-head attention and feed-forward network."""
def __init__(
self,
intermediate_dim: int,
num_heads: int,
dropout: float = 0.0,
activation: str = "relu",
layer_norm_epsilon: float = 1e-05,
kernel_initializer: str = "glorot_uniform",
bias_initializer: str = "zeros",
**kwargs
): ...
class TransformerDecoder:
"""Transformer decoder block with masked multi-head attention."""
def __init__(
self,
intermediate_dim: int,
num_heads: int,
dropout: float = 0.0,
activation: str = "relu",
layer_norm_epsilon: float = 1e-05,
kernel_initializer: str = "glorot_uniform",
bias_initializer: str = "zeros",
**kwargs
): ...
class CachedMultiHeadAttention:
"""Multi-head attention with key/value caching for efficient generation."""
def __init__(
self,
num_heads: int,
key_dim: int,
value_dim: int = None,
dropout: float = 0.0,
use_bias: bool = True,
kernel_initializer: str = "glorot_uniform",
bias_initializer: str = "zeros",
**kwargs
): ...Various embedding strategies for representing tokens and positions.
class PositionEmbedding:
"""Learnable position embeddings."""
def __init__(
self,
sequence_length: int,
initializer: str = "glorot_uniform",
**kwargs
): ...
class TokenAndPositionEmbedding:
"""Combined token and position embeddings."""
def __init__(
self,
vocabulary_size: int,
sequence_length: int,
embedding_dim: int,
mask_zero: bool = False,
**kwargs
): ...
class ReversibleEmbedding:
"""Reversible token embeddings that can map back from embeddings to tokens."""
def __init__(
self,
input_dim: int,
output_dim: int,
tie_weights: bool = True,
embeddings_initializer: str = "uniform",
**kwargs
): ...
class RotaryEmbedding:
"""Rotary position embeddings (RoPE)."""
def __init__(
self,
max_wavelength: int = 10000,
scaling_factor: float = 1.0,
**kwargs
): ...
class SinePositionEncoding:
"""Sinusoidal position encoding as used in original Transformer."""
def __init__(
self,
max_wavelength: int = 10000,
**kwargs
): ...
class AlibiBias:
"""Attention bias for ALiBi (Attention with Linear Biases) positional encoding."""
def __init__(
self,
num_heads: int,
**kwargs
): ...Normalization techniques used in modern transformer architectures.
class RMSNormalization:
"""Root Mean Square (RMS) normalization layer."""
def __init__(
self,
epsilon: float = 1e-06,
**kwargs
): ...Specialized output layers for different tasks.
class MaskedLMHead:
"""Output head for masked language modeling tasks."""
def __init__(
self,
vocabulary_size: int,
token_embedding: ReversibleEmbedding = None,
activation: str = None,
**kwargs
): ...Layers for data preprocessing and augmentation.
class ImageConverter:
"""Base class for image data conversion and preprocessing."""
def __init__(
self,
height: int,
width: int,
crop_to_aspect_ratio: bool = True,
interpolation: str = "bilinear",
data_format: str = None,
**kwargs
): ...
class AudioConverter:
"""Base class for audio data conversion and preprocessing."""
def __init__(
self,
sample_rate: int = 16000,
**kwargs
): ...
class MaskedLMMaskGenerator:
"""Generate masks for masked language modeling tasks."""
def __init__(
self,
mask_selection_rate: float = 0.15,
mask_token_rate: float = 0.8,
random_token_rate: float = 0.1,
mask_token_id: int = None,
vocabulary_size: int = None,
**kwargs
): ...
class MultiSegmentPacker:
"""Pack multiple text segments with appropriate separators."""
def __init__(
self,
start_value: int,
end_value: int,
sep_value: int = None,
pad_value: int = 0,
sequence_length: int = None,
**kwargs
): ...
class StartEndPacker:
"""Pack text sequences with start and end tokens."""
def __init__(
self,
start_value: int,
end_value: int,
pad_value: int = 0,
sequence_length: int = None,
return_padding_mask: bool = False,
**kwargs
): ...
class RandomDeletion:
"""Randomly delete tokens from sequences for data augmentation."""
def __init__(
self,
rate: float = 0.1,
max_deletions: int = None,
skip_list: list = None,
seed: int = None,
**kwargs
): ...
class RandomSwap:
"""Randomly swap adjacent tokens for data augmentation."""
def __init__(
self,
rate: float = 0.1,
max_swaps: int = None,
skip_list: list = None,
seed: int = None,
**kwargs
): ...Specialized layers for object detection tasks.
class AnchorGenerator:
"""Generate anchor boxes for object detection."""
def __init__(
self,
bounding_box_format: str,
min_level: int,
max_level: int,
num_scales: int,
aspect_ratios: list,
anchor_size: int,
**kwargs
): ...
class BoxMatcher:
"""Match ground truth boxes to anchor boxes."""
def __init__(
self,
thresholds: list,
match_values: list,
force_match_for_each_col: bool = False,
**kwargs
): ...
class NonMaxSuppression:
"""Non-maximum suppression for object detection post-processing."""
def __init__(
self,
bounding_box_format: str,
from_logits: bool = False,
iou_threshold: float = 0.5,
confidence_threshold: float = 0.05,
max_detections: int = 100,
max_detections_per_class: int = 100,
**kwargs
): ...Components specific to certain model architectures.
class FNetEncoder:
"""F-Net encoder using Fourier transforms instead of attention."""
def __init__(
self,
intermediate_dim: int,
activation: str = "relu",
dropout: float = 0.0,
layer_norm_epsilon: float = 1e-05,
**kwargs
): ...
class SAMMaskDecoder:
"""Mask decoder for Segment Anything Model."""
def __init__(
self,
num_multimask_outputs: int = 3,
iou_head_depth: int = 3,
iou_head_hidden_dim: int = 256,
**kwargs
): ...
class SAMPromptEncoder:
"""Prompt encoder for Segment Anything Model."""
def __init__(
self,
embed_dim: int = 256,
image_embedding_size: tuple = (64, 64),
input_image_size: tuple = (1024, 1024),
mask_in_chans: int = 16,
**kwargs
): ...import keras_hub
import keras
# Create custom model using Keras Hub layers
def create_custom_transformer(
vocab_size: int,
sequence_length: int,
num_layers: int = 6,
hidden_dim: int = 512,
num_heads: int = 8,
intermediate_dim: int = 2048
):
# Input
token_ids = keras.Input(shape=(sequence_length,), dtype="int32")
# Token and position embeddings
embeddings = keras_hub.layers.TokenAndPositionEmbedding(
vocabulary_size=vocab_size,
sequence_length=sequence_length,
embedding_dim=hidden_dim
)(token_ids)
# Stack transformer encoder layers
x = embeddings
for _ in range(num_layers):
x = keras_hub.layers.TransformerEncoder(
intermediate_dim=intermediate_dim,
num_heads=num_heads,
dropout=0.1
)(x)
# Output layer for classification
outputs = keras.layers.Dense(2, activation="softmax")(x[:, 0, :]) # Use [CLS] token
return keras.Model(token_ids, outputs)
# Create and compile model
model = create_custom_transformer(vocab_size=10000, sequence_length=128)
model.compile(optimizer="adam", loss="sparse_categorical_crossentropy")import keras_hub
# Create mask generator for MLM training
mask_generator = keras_hub.layers.MaskedLMMaskGenerator(
mask_selection_rate=0.15,
mask_token_rate=0.8,
random_token_rate=0.1,
mask_token_id=103, # [MASK] token ID
vocabulary_size=30000
)
# Apply masking to token sequences
tokens = [[101, 2054, 2003, 2115, 2171, 102]] # Example tokenized text
masked_tokens, mask_positions, mask_ids = mask_generator(tokens)
print("Masked tokens:", masked_tokens)
print("Mask positions:", mask_positions)import keras_hub
import keras
# Use rotary embeddings in custom attention
class CustomAttentionWithRoPE(keras.layers.Layer):
def __init__(self, num_heads, key_dim, **kwargs):
super().__init__(**kwargs)
self.num_heads = num_heads
self.key_dim = key_dim
self.rotary_embedding = keras_hub.layers.RotaryEmbedding()
self.attention = keras.layers.MultiHeadAttention(
num_heads=num_heads,
key_dim=key_dim
)
def call(self, inputs):
# Apply rotary embeddings
rotary_embedded = self.rotary_embedding(inputs)
# Apply attention
return self.attention(rotary_embedded, rotary_embedded)
# Use in model
attention_layer = CustomAttentionWithRoPE(num_heads=8, key_dim=64)import keras_hub
# Create augmentation pipeline
random_deletion = keras_hub.layers.RandomDeletion(rate=0.1)
random_swap = keras_hub.layers.RandomSwap(rate=0.05)
# Apply augmentations
original_tokens = [[1, 2, 3, 4, 5, 6, 7, 8]]
augmented_tokens = random_deletion(original_tokens)
augmented_tokens = random_swap(augmented_tokens)
print("Original:", original_tokens[0])
print("Augmented:", augmented_tokens[0])import keras_hub
# Pack multiple segments with separators
packer = keras_hub.layers.MultiSegmentPacker(
start_value=101, # [CLS]
end_value=102, # [SEP]
pad_value=0, # [PAD]
sequence_length=128
)
# Pack two text segments
segment1 = [2054, 2003, 2115, 2171] # "what is your name"
segment2 = [2026, 2572, 2017, 2009] # "my name is"
packed = packer([segment1, segment2])
print("Packed sequence:", packed)import keras_hub
# Create anchor generator for object detection
anchor_generator = keras_hub.layers.AnchorGenerator(
bounding_box_format="xyxy",
min_level=3,
max_level=7,
num_scales=3,
aspect_ratios=[0.5, 1.0, 2.0],
anchor_size=4
)
# Generate anchors for feature maps
feature_map_shapes = [(32, 32), (16, 16), (8, 8)]
anchors = anchor_generator(feature_map_shapes)
print("Generated anchors shape:", anchors.shape)
# Apply non-max suppression
nms = keras_hub.layers.NonMaxSuppression(
bounding_box_format="xyxy",
iou_threshold=0.5,
confidence_threshold=0.05,
max_detections=100
)import keras_hub
import keras
# Use RMS normalization in custom model
class CustomLayer(keras.layers.Layer):
def __init__(self, **kwargs):
super().__init__(**kwargs)
self.dense = keras.layers.Dense(512)
self.rms_norm = keras_hub.layers.RMSNormalization()
def call(self, inputs):
x = self.dense(inputs)
return self.rms_norm(x)
# Use in model
layer = CustomLayer()Install with Tessl CLI
npx tessl i tessl/pypi-keras-hub