State-of-the-art Parameter-Efficient Fine-Tuning (PEFT) methods for efficiently adapting large pretrained models
—
Low-Rank Adaptation (LoRA) and related methods that decompose weight updates into low-rank matrices, enabling efficient fine-tuning with minimal parameter overhead. This includes standard LoRA, adaptive variants, and structural improvements.
Low-Rank Adaptation decomposes weight updates into two smaller matrices, dramatically reducing the number of trainable parameters.
@dataclass
class LoraConfig(PeftConfig):
"""Configuration for LoRA (Low-Rank Adaptation)."""
r: int = 8 # LoRA attention dimension (rank)
lora_alpha: int = 8 # LoRA scaling parameter
target_modules: Optional[Union[List[str], str]] = None # Names of modules to apply LoRA to
exclude_modules: Optional[Union[List[str], str]] = None # Names of modules to exclude from LoRA
lora_dropout: float = 0.0 # LoRA dropout probability
fan_in_fan_out: bool = False # Set True if layer stores weight like (fan_in, fan_out)
bias: Literal["none", "all", "lora_only"] = "none" # Bias type for LoRA
use_rslora: bool = False # Whether to use rank-stabilized LoRA
modules_to_save: Optional[List[str]] = None # Modules apart from LoRA layers to be trainable
init_lora_weights: Union[bool, Literal["gaussian", "eva", "olora", "pissa", "corda", "loftq", "orthogonal"]] = True
layers_to_transform: Optional[Union[List[int], int]] = None # Layers to apply LoRA to
layers_pattern: Optional[str] = None # Pattern for layer names
rank_pattern: Optional[dict] = None # Mapping from layer names to different ranks
alpha_pattern: Optional[dict] = None # Mapping from layer names to different alphas
megatron_config: Optional[dict] = None # Megatron-specific configuration
megatron_core: Optional[str] = None # Megatron core module version
loftq_config: Optional[LoftQConfig] = None # LoftQ initialization configuration
use_dora: bool = False # Whether to use DoRA (Weight-Decomposed LoRA)
layer_replication: Optional[List[Tuple[int, int]]] = None # Layer replication for parameter sharing
runtime_config: Optional[LoraRuntimeConfig] = None # Runtime configuration for LoRA
eva_config: Optional[EvaConfig] = None # EVA initialization configuration
# Additional parameters for specific use cases
target_parameters: Optional[List[str]] = None # Parameters to target instead of modules
class LoraModel:
"""LoRA model implementation."""
def __init__(self, model, config: LoraConfig, adapter_name: str): ...
class LoraRuntimeConfig:
"""Runtime configuration for LoRA that can be changed during inference."""
def __init__(
self,
ephemeral_gpu_offload: bool = False,
**kwargs
): ...Adaptive LoRA that dynamically allocates parameter budget across weight matrices based on importance scores.
class AdaLoraConfig(PeftConfig):
"""Configuration for AdaLoRA (Adaptive LoRA)."""
def __init__(
self,
target_r: int = 8,
init_r: int = 12,
tinit: int = 0,
tfinal: int = 0,
deltaT: int = 1,
beta1: float = 0.85,
beta2: float = 0.85,
orth_reg_weight: float = 0.5,
total_step: Optional[int] = None,
rank_pattern: Optional[dict] = None,
**kwargs
):
"""
Args:
target_r: Target average rank of incremental matrix
init_r: Initial rank for each incremental matrix
tinit: Number of warmup steps for rank reduction
tfinal: Final step for rank reduction
deltaT: Step interval for rank reduction
beta1: Hyperparameter of EMA for sensitivity smoothing
beta2: Hyperparameter of EMA for undertainty quantification
orth_reg_weight: Orthogonal regularization weight
total_step: Total training steps (for automatic scheduling)
rank_pattern: Mapping from layer names to different target ranks
"""
class AdaLoraModel:
"""AdaLoRA model implementation."""
def __init__(self, model, config: AdaLoraConfig, adapter_name: str): ...
def update_and_allocate(self, global_step: int): ...Alternative LoRA formulations that modify the decomposition or combination strategy.
class LoHaConfig(PeftConfig):
"""Configuration for LoHa (Low-Rank Hadamard Product)."""
def __init__(
self,
r: int = 8,
alpha: int = 8,
rank_dropout: float = 0.0,
module_dropout: float = 0.0,
use_effective_conv2d: bool = False,
**kwargs
):
"""
Args:
r: LoHa rank
alpha: LoHa alpha scaling parameter
rank_dropout: Rank dropout probability
module_dropout: Module dropout probability
use_effective_conv2d: Use parameter effective decomposition for Conv2d
"""
class LoHaModel:
"""LoHa model implementation."""
def __init__(self, model, config: LoHaConfig, adapter_name: str): ...
class LoKrConfig(PeftConfig):
"""Configuration for LoKr (Low-Rank Kronecker Product)."""
def __init__(
self,
r: int = 8,
alpha: int = 8,
rank_dropout: float = 0.0,
module_dropout: float = 0.0,
use_effective_conv2d: bool = False,
decompose_both: bool = False,
decompose_factor: int = -1,
**kwargs
):
"""
Args:
r: LoKr rank
alpha: LoKr alpha scaling parameter
rank_dropout: Rank dropout probability
module_dropout: Module dropout probability
use_effective_conv2d: Use parameter effective decomposition for Conv2d
decompose_both: Decompose both input and output dimensions
decompose_factor: Factor for matrix decomposition
"""
class LoKrModel:
"""LoKr model implementation."""
def __init__(self, model, config: LoKrConfig, adapter_name: str): ...Specialized configurations and initialization methods for LoRA.
@dataclass
class LoftQConfig:
"""Configuration for LoftQ initialization."""
loftq_bits: int = 4 # Quantization bits for LoftQ
loftq_iter: int = 1 # Number of LoftQ iterations
@dataclass
class EvaConfig:
"""Configuration for EVA (Eigenvalue Adaptation) initialization."""
rho: float = 2.0 # Rho value for EVA redistribution (>= 1.0)
tau: float = 0.99 # Cosine similarity threshold for early stopping
use_label_mask: bool = True # Use label mask for EVA initialization
label_mask_value: int = -100 # Value to look for to mask out ignored tokens
whiten: bool = False # Apply whitening to singular vectors
adjust_scaling_factors: bool = True # Adjust scaling factors during EVA
class VBLoRAConfig(PeftConfig):
"""Configuration for VBLoRA (Variable Budget LoRA)."""
def __init__(
self,
r: int = 8,
lora_alpha: int = 8,
target_modules: Optional[Union[List[str], str]] = None,
lora_dropout: float = 0.0,
**kwargs
): ...
class VBLoRAModel:
"""VBLoRA model implementation."""
def __init__(self, model, config: VBLoRAConfig, adapter_name: str): ...
class RandLoraConfig(PeftConfig):
"""Configuration for RandLoRA (Randomized LoRA)."""
def __init__(
self,
r: int = 8,
lora_alpha: int = 8,
target_modules: Optional[Union[List[str], str]] = None,
lora_dropout: float = 0.0,
**kwargs
): ...
class RandLoraModel:
"""RandLoRA model implementation."""
def __init__(self, model, config: RandLoraConfig, adapter_name: str): ...Utility functions for LoRA weight management and initialization.
def get_eva_state_dict(model, adapter_name: str = "default") -> dict:
"""
Get EVA state dictionary for LoRA model.
Args:
model: PEFT model with EVA initialization
adapter_name: Name of the adapter
Returns:
State dictionary for EVA weights
"""
def initialize_lora_eva_weights(model, adapter_name: str = "default"):
"""
Initialize LoRA weights using EVA method.
Args:
model: PEFT model to initialize
adapter_name: Name of the adapter to initialize
"""
def replace_lora_weights_loftq(
peft_model,
quantized_model,
num_iter: int = 1,
device: Optional[str] = None
):
"""
Replace LoRA weights with LoftQ initialization.
Args:
peft_model: PEFT model with LoRA adapters
quantized_model: Quantized base model
num_iter: Number of LoftQ iterations
device: Device to perform computation on
"""from transformers import AutoModelForCausalLM
from peft import get_peft_model, LoraConfig
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-medium")
# Standard LoRA configuration
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["c_attn", "c_proj"],
lora_dropout=0.1,
bias="none",
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, lora_config)from peft import AdaLoraConfig
adalora_config = AdaLoraConfig(
target_r=8,
init_r=12,
tinit=200,
tfinal=1000,
deltaT=10,
beta1=0.85,
beta2=0.85,
orth_reg_weight=0.5,
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, adalora_config)
# During training, call rank update
peft_model.peft_modules[adapter_name].update_and_allocate(global_step)from peft import LoraConfig, LoftQConfig
loftq_config = LoftQConfig(loftq_bits=4, loftq_iter=1)
lora_config = LoraConfig(
r=16,
lora_alpha=32,
target_modules=["q_proj", "v_proj"],
loftq_config=loftq_config,
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(quantized_model, lora_config)dora_config = LoraConfig(
r=8,
lora_alpha=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
use_dora=True, # Enable DoRA
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, dora_config)LoRA variant using Hadamard products for improved expressiveness with fewer parameters.
class LoHaConfig(PeftConfig):
"""Configuration for LoHa (Low-Rank Adaptation with Hadamard Product)."""
def __init__(
self,
r: int = 8,
alpha: int = 8,
target_modules: Optional[Union[List[str], str]] = None,
exclude_modules: Optional[Union[List[str], str]] = None,
dropout: float = 0.0,
modules_to_save: Optional[List[str]] = None,
**kwargs
): ...
class LoHaModel:
"""LoHa model implementation."""
def __init__(self, model, config: LoHaConfig, adapter_name: str): ...Usage:
loha_config = LoHaConfig(
r=8,
alpha=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
dropout=0.1,
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, loha_config)LoRA variant using Kronecker products for structured low-rank decomposition.
class LoKrConfig(PeftConfig):
"""Configuration for LoKr (Low-Rank Adaptation with Kronecker Product)."""
def __init__(
self,
r: int = 8,
alpha: int = 8,
target_modules: Optional[Union[List[str], str]] = None,
exclude_modules: Optional[Union[List[str], str]] = None,
dropout: float = 0.0,
modules_to_save: Optional[List[str]] = None,
decompose_both: bool = False,
decompose_factor: int = -1,
**kwargs
): ...
class LoKrModel:
"""LoKr model implementation."""
def __init__(self, model, config: LoKrConfig, adapter_name: str): ...Usage:
lokr_config = LoKrConfig(
r=8,
alpha=16,
target_modules=["q_proj", "k_proj", "v_proj", "o_proj"],
decompose_both=True,
decompose_factor=8,
task_type="CAUSAL_LM"
)
peft_model = get_peft_model(model, lokr_config)Configuration class for LoftQ initialization used with LoRA methods.
class LoftQConfig:
"""Configuration for LoftQ (LoRA-Fine-Tuning-aware Quantization)."""
def __init__(
self,
loftq_bits: int = 4,
loftq_iter: int = 1,
fake_quant: bool = True,
**kwargs
):
"""
Args:
loftq_bits: Number of bits for quantization
loftq_iter: Number of alternating steps
fake_quant: Whether to use fake quantization
"""Runtime configuration for LoRA that can be modified during inference.
class LoraRuntimeConfig:
"""Runtime configuration for LoRA."""
def __init__(
self,
ephemeral_gpu_offload: bool = False,
**kwargs
):
"""
Args:
ephemeral_gpu_offload: Whether to use ephemeral GPU offloading
"""Install with Tessl CLI
npx tessl i tessl/pypi-peft