A fast library for automated machine learning and tuning
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Automated online learning system using Vowpal Wabbit with multiple model management, adaptive resource allocation, and real-time model selection. The online learning module is designed for streaming data scenarios where models need to continuously adapt to new information.
Main class for automated online learning with Vowpal Wabbit, managing multiple models simultaneously and selecting the best performer dynamically.
class AutoVW:
def __init__(self, max_live_model_num, search_space, init_config={},
min_resource_lease="auto", automl_runner_args={}, scheduler_args={},
model_select_policy="threshold_loss_ucb", metric="mae_clipped",
random_seed=None, model_selection_mode="min", cb_coef=None):
"""
Initialize AutoVW for automated online learning.
Args:
max_live_model_num (int): Maximum number of 'live' models to maintain
search_space (dict): Hyperparameter search space including both tunable
and fixed hyperparameters
init_config (dict): Initial partial or full configuration
min_resource_lease (str or float): Minimum resource lease for models ('auto' or float)
automl_runner_args (dict): Configuration for OnlineTrialRunner
scheduler_args (dict): Configuration for scheduler
model_select_policy (str): Model selection policy ('threshold_loss_ucb', etc.)
metric (str): Loss function metric ('mae_clipped', 'mae', 'mse', 'absolute_loss')
random_seed (int): Random seed for reproducibility
model_selection_mode (str): Optimization mode ('min' or 'max')
cb_coef (float): Sample complexity bound coefficient
"""
def predict(self, data_sample):
"""
Make prediction on a data sample.
Args:
data_sample: Input data sample in VW format or structured format
Returns:
Prediction value from the selected model
"""
def learn(self, data_sample):
"""
Update models with new data sample.
Args:
data_sample: Training data sample with features and label
"""class AutoVW:
WARMSTART_NUM = 100 # Number of warmstart samples
AUTOMATIC = "_auto" # Automatic configuration identifier
VW_INTERACTION_ARG_NAME = "interactions" # VW interactions argument nameIndividual Vowpal Wabbit trial representing a single model configuration.
class VowpalWabbitTrial:
"""
Individual VW model trial in online learning system.
Manages a single VW model instance with specific hyperparameters.
"""Manages execution and coordination of multiple online learning trials.
class OnlineTrialRunner:
"""
Manages execution of online learning trials.
Coordinates multiple VW models and handles resource allocation.
"""def get_ns_feature_dim_from_vw_example(vw_example):
"""
Extract namespace feature dimensions from VW example.
Args:
vw_example (str): Vowpal Wabbit format example string
Returns:
dict: Dictionary mapping namespace to feature dimensions
"""from flaml import AutoVW
# Define search space for hyperparameters
search_space = {
"learning_rate": {"_type": "loguniform", "_value": [0.001, 1.0]},
"l1": {"_type": "loguniform", "_value": [1e-10, 1.0]},
"l2": {"_type": "loguniform", "_value": [1e-10, 1.0]},
"interactions": {"_type": "choice", "_value": [set(), {"ab"}, {"ac"}, {"ab", "ac"}]}
}
# Initialize AutoVW
autovw = AutoVW(
max_live_model_num=5,
search_space=search_space,
init_config={"learning_rate": 0.1},
metric="mae_clipped",
random_seed=42
)
# Simulate streaming data
for i, data_sample in enumerate(streaming_data):
# Make prediction
prediction = autovw.predict(data_sample)
# Update models with new sample
autovw.learn(data_sample)
if i % 1000 == 0:
print(f"Processed {i} samples, latest prediction: {prediction}")from flaml import AutoVW
# Advanced search space with multiple hyperparameters
search_space = {
"learning_rate": {"_type": "loguniform", "_value": [0.0001, 1.0]},
"power_t": {"_type": "uniform", "_value": [0.0, 1.0]},
"l1": {"_type": "loguniform", "_value": [1e-10, 1.0]},
"l2": {"_type": "loguniform", "_value": [1e-10, 1.0]},
"interactions": {"_type": "choice", "_value": [
set(), {"ab"}, {"ac"}, {"bc"}, {"ab", "ac"}, {"ab", "bc"}, {"ac", "bc"}
]},
"bit_precision": {"_type": "choice", "_value": [18, 20, 22, 24]}
}
# Custom runner and scheduler arguments
automl_runner_args = {
"champion_test_policy": "loss_ucb",
"remove_worse": True
}
scheduler_args = {
"resource_dimension": "sample_size",
"max_resource": 10000,
"reduction_factor": 2
}
# Initialize with advanced configuration
autovw = AutoVW(
max_live_model_num=10,
search_space=search_space,
init_config={"learning_rate": 0.05, "l1": 1e-6},
min_resource_lease=100,
automl_runner_args=automl_runner_args,
scheduler_args=scheduler_args,
model_select_policy="threshold_loss_ucb",
metric="mae", # Mean absolute error
cb_coef=0.1, # Confidence bound coefficient
random_seed=123
)import pandas as pd
from flaml import AutoVW
# Search space for regression task
search_space = {
"learning_rate": {"_type": "loguniform", "_value": [0.001, 0.5]},
"l1": {"_type": "loguniform", "_value": [1e-8, 0.1]},
"l2": {"_type": "loguniform", "_value": [1e-8, 0.1]}
}
autovw = AutoVW(
max_live_model_num=3,
search_space=search_space,
metric="mse",
model_selection_mode="min"
)
# Process streaming CSV data
def process_csv_stream(csv_file):
for chunk in pd.read_csv(csv_file, chunksize=1000):
for _, row in chunk.iterrows():
# Convert to VW format: label |features feature1:value1 feature2:value2
vw_sample = f"{row['target']} |features "
vw_sample += " ".join([f"{col}:{row[col]}" for col in chunk.columns if col != 'target'])
# Get prediction before updating
pred = autovw.predict(vw_sample)
# Update model
autovw.learn(vw_sample)
yield pred, row['target']
# Use with streaming data
predictions_and_actuals = list(process_csv_stream("streaming_data.csv"))from flaml import AutoVW
# Search space for multi-class classification
search_space = {
"learning_rate": {"_type": "loguniform", "_value": [0.01, 1.0]},
"oaa": {"_type": "choice", "_value": [3, 5, 10]}, # One-Against-All classes
"loss_function": {"_type": "choice", "_value": ["logistic", "hinge"]}
}
# Initialize for classification
autovw_classifier = AutoVW(
max_live_model_num=4,
search_space=search_space,
init_config={"oaa": 3},
metric="absolute_loss",
random_seed=456
)
# Example with categorical features
def create_vw_multiclass_sample(features, label):
"""Convert features to VW multi-class format."""
vw_line = f"{label} |features "
for key, value in features.items():
if isinstance(value, str):
# Categorical feature
vw_line += f"{key}_{value}:1 "
else:
# Numerical feature
vw_line += f"{key}:{value} "
return vw_line.strip()
# Process multi-class data
sample_features = {"age": 25, "category": "A", "score": 0.8}
sample_label = 2 # Class label
vw_sample = create_vw_multiclass_sample(sample_features, sample_label)
prediction = autovw_classifier.predict(vw_sample)
autovw_classifier.learn(vw_sample)from flaml import AutoVW
# Search space for contextual bandits
search_space = {
"learning_rate": {"_type": "loguniform", "_value": [0.001, 0.1]},
"cb_explore_adf": {"_type": "choice", "_value": [True]},
"epsilon": {"_type": "uniform", "_value": [0.01, 0.3]}
}
# Initialize for contextual bandit
autovw_cb = AutoVW(
max_live_model_num=5,
search_space=search_space,
metric="cb_loss",
model_selection_mode="min"
)
def create_cb_sample(context, action, cost, probability):
"""Create contextual bandit VW format sample."""
# Format: cost:probability:action |context features
vw_line = f"{cost}:{probability}:{action} |context "
vw_line += " ".join([f"{k}:{v}" for k, v in context.items()])
return vw_line
# Example contextual bandit interaction
context = {"user_age": 30, "day_of_week": 2, "weather": 1}
action = 1 # Action taken
cost = 0.5 # Cost observed (lower is better)
probability = 0.2 # Probability of taking this action
cb_sample = create_cb_sample(context, action, cost, probability)
autovw_cb.learn(cb_sample)
# For prediction, provide context without action/cost
prediction_context = "1 |context user_age:25 day_of_week:3 weather:0"
predicted_action = autovw_cb.predict(prediction_context)Lower-level components for managing individual Vowpal Wabbit trials and online trial execution.
class VowpalWabbitTrial:
"""Individual Vowpal Wabbit trial with specific hyperparameters."""
def __init__(self, config, trial_id=None):
"""
Initialize VW trial.
Args:
config (dict): VW hyperparameter configuration
trial_id (str): Unique trial identifier
"""
def train_eval(self, data_sample, eval_only=False):
"""
Train and/or evaluate on data sample.
Args:
data_sample (str): VW-formatted data sample
eval_only (bool): Only evaluate without training
Returns:
dict: Performance metrics
"""
def predict(self, data_sample):
"""Make prediction on data sample."""
@property
def config(self):
"""dict: Trial configuration"""
@property
def trial_id(self):
"""str: Trial identifier"""
class OnlineTrialRunner:
"""Manager for running multiple online learning trials."""
def __init__(self, search_space, max_live_model_num=5, **kwargs):
"""
Initialize online trial runner.
Args:
search_space (dict): Hyperparameter search space
max_live_model_num (int): Maximum concurrent models
**kwargs: Additional configuration
"""
def step(self, data_sample):
"""
Process one data sample across all active trials.
Args:
data_sample (str): VW-formatted data sample
Returns:
dict: Aggregated results from all trials
"""
def get_best_trial(self):
"""Get currently best performing trial."""
def suggest_trial(self):
"""Suggest new trial configuration."""
def remove_trial(self, trial_id):
"""Remove trial from active set."""Install with Tessl CLI
npx tessl i tessl/pypi-flaml