Collection of common python utils for machine learning and scientific computing workflows
—
Absl flags and application utilities for building command-line applications with dataclass-based flag parsing, enhanced logging, and streamlined application development workflows.
Create command-line flag parsers from dataclasses for type-safe argument handling.
def make_flags_parser(
dataclass_cls: type,
prefix: str = "",
exclude_fields: set[str] | None = None
) -> Callable[[list[str]], Any]:
"""
Create a flags parser from a dataclass definition.
Args:
dataclass_cls: Dataclass to create flags from
prefix: Prefix for flag names (e.g., "model_" for --model_param)
exclude_fields: Set of field names to exclude from flags
Returns:
Parser function that takes command-line arguments and returns
populated dataclass instance
Example:
@dataclass
class Config:
learning_rate: float = 0.01
batch_size: int = 32
parser = make_flags_parser(Config)
config = parser(sys.argv[1:]) # Parse from command line
"""Improved logging configuration for better development and production logging.
def better_logging(
level: str = 'INFO',
format_string: str | None = None,
include_timestamp: bool = True,
include_level: bool = True,
include_module: bool = False,
colored_output: bool = True
) -> None:
"""
Configure enhanced logging with better defaults.
Args:
level: Logging level ('DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL')
format_string: Custom format string for log messages
include_timestamp: Include timestamp in log messages
include_level: Include log level in messages
include_module: Include module name in messages
colored_output: Use colored output for different log levels
Example:
better_logging('DEBUG', colored_output=True)
logging.info("This will be nicely formatted")
"""from etils import eapp
from dataclasses import dataclass
from typing import Optional
import sys
@dataclass
class TrainingConfig:
"""Configuration for model training."""
learning_rate: float = 0.001
batch_size: int = 32
num_epochs: int = 100
model_name: str = "resnet50"
data_path: str = "/data/train"
output_dir: str = "./outputs"
use_gpu: bool = True
seed: Optional[int] = None
# Create parser from dataclass
parser = eapp.make_flags_parser(TrainingConfig)
# Parse command line arguments
# python script.py --learning_rate=0.01 --batch_size=64 --num_epochs=50
config = parser(sys.argv[1:])
print(f"Training with learning rate: {config.learning_rate}")
print(f"Batch size: {config.batch_size}")
print(f"Number of epochs: {config.num_epochs}")from etils import eapp
from dataclasses import dataclass, field
from typing import List, Dict, Optional
import json
@dataclass
class ModelConfig:
"""Model architecture configuration."""
layers: List[int] = field(default_factory=lambda: [64, 128, 256])
dropout_rate: float = 0.1
activation: str = "relu"
use_batch_norm: bool = True
@dataclass
class DataConfig:
"""Data processing configuration."""
input_size: tuple[int, int] = (224, 224)
normalize: bool = True
augmentation: bool = False
num_workers: int = 4
@dataclass
class ExperimentConfig:
"""Complete experiment configuration."""
experiment_name: str = "default_experiment"
model: ModelConfig = field(default_factory=ModelConfig)
data: DataConfig = field(default_factory=DataConfig)
# Training parameters
learning_rate: float = 0.001
weight_decay: float = 1e-4
max_epochs: int = 100
# System configuration
device: str = "cuda"
precision: str = "16-mixed"
# Optional parameters
checkpoint_path: Optional[str] = None
resume_from: Optional[str] = None
# Advanced options
hyperparams: Dict[str, float] = field(default_factory=dict)
# Create parsers with prefixes for nested configs
model_parser = eapp.make_flags_parser(ModelConfig, prefix="model_")
data_parser = eapp.make_flags_parser(DataConfig, prefix="data_")
main_parser = eapp.make_flags_parser(
ExperimentConfig,
exclude_fields={"model", "data"} # Handle these separately
)
def parse_experiment_config(args: List[str]) -> ExperimentConfig:
"""Parse complete experiment configuration."""
# Parse main config
config = main_parser(args)
# Parse nested configs
config.model = model_parser(args)
config.data = data_parser(args)
return config
# Command line usage:
# python train.py --experiment_name=vision_v1 --learning_rate=0.01 \\
# --model_layers="[128,256,512]" --data_input_size="(256,256)"
config = parse_experiment_config(sys.argv[1:])from etils import eapp
import logging
# Basic enhanced logging
eapp.better_logging('INFO')
# Development logging with debug information
eapp.better_logging(
level='DEBUG',
include_module=True,
colored_output=True
)
# Production logging with structured format
eapp.better_logging(
level='WARNING',
format_string='%(asctime)s [%(levelname)s] %(name)s: %(message)s',
colored_output=False
)
# Custom format for specific use case
eapp.better_logging(
level='INFO',
format_string='[%(levelname)s] %(funcName)s:%(lineno)d - %(message)s',
include_timestamp=True,
colored_output=True
)
# Use the configured logging
logging.info("Application started")
logging.debug("Debug information")
logging.warning("Warning message")
logging.error("Error occurred")from etils import eapp
from dataclasses import dataclass, field
from typing import List, Optional
import logging
import sys
@dataclass
class DatabaseConfig:
"""Database connection configuration."""
host: str = "localhost"
port: int = 5432
database: str = "myapp"
username: str = "user"
password: str = ""
pool_size: int = 10
@dataclass
class ServerConfig:
"""Server configuration."""
host: str = "0.0.0.0"
port: int = 8000
workers: int = 4
reload: bool = False
debug: bool = False
@dataclass
class AppConfig:
"""Main application configuration."""
# Application settings
app_name: str = "MyApp"
version: str = "1.0.0"
log_level: str = "INFO"
# Component configurations
database: DatabaseConfig = field(default_factory=DatabaseConfig)
server: ServerConfig = field(default_factory=ServerConfig)
# Feature flags
enable_metrics: bool = True
enable_tracing: bool = False
# Optional settings
config_file: Optional[str] = None
secrets_file: Optional[str] = None
def create_app_parser() -> callable:
"""Create application argument parser."""
# Create parsers for each component
db_parser = eapp.make_flags_parser(DatabaseConfig, prefix="db_")
server_parser = eapp.make_flags_parser(ServerConfig, prefix="server_")
app_parser = eapp.make_flags_parser(
AppConfig,
exclude_fields={"database", "server"}
)
def parse_args(args: List[str]) -> AppConfig:
"""Parse all application arguments."""
config = app_parser(args)
config.database = db_parser(args)
config.server = server_parser(args)
return config
return parse_args
def main():
"""Main application entry point."""
# Parse configuration
parser = create_app_parser()
config = parser(sys.argv[1:])
# Setup logging based on configuration
eapp.better_logging(
level=config.log_level,
include_module=config.server.debug,
colored_output=config.server.debug
)
logging.info(f"Starting {config.app_name} v{config.version}")
logging.info(f"Database: {config.database.host}:{config.database.port}")
logging.info(f"Server: {config.server.host}:{config.server.port}")
if config.enable_metrics:
logging.info("Metrics collection enabled")
if config.enable_tracing:
logging.info("Distributed tracing enabled")
# Start application components
start_database(config.database)
start_server(config.server)
def start_database(db_config: DatabaseConfig):
"""Initialize database connection."""
logging.info(f"Connecting to database: {db_config.database}")
# Database initialization logic here
def start_server(server_config: ServerConfig):
"""Start the web server."""
logging.info(f"Starting server on {server_config.host}:{server_config.port}")
# Server startup logic here
if __name__ == "__main__":
# Command line usage:
# python app.py --log_level=DEBUG --db_host=prod-db --server_port=9000 \\
# --server_workers=8 --enable_metrics=true
main()from etils import eapp
from dataclasses import dataclass
import absl.app
import absl.flags
import absl.logging
# Traditional Absl flags approach
FLAGS = absl.flags.FLAGS
absl.flags.DEFINE_string('model_path', '/models/default', 'Path to model')
absl.flags.DEFINE_float('threshold', 0.5, 'Classification threshold')
@dataclass
class ProcessingConfig:
"""Processing pipeline configuration."""
batch_size: int = 100
num_threads: int = 4
output_format: str = "json"
verbose: bool = False
# Combine traditional flags with dataclass parser
processing_parser = eapp.make_flags_parser(ProcessingConfig, prefix="proc_")
def main(argv):
"""Main function compatible with absl.app."""
del argv # Unused
# Setup enhanced logging
eapp.better_logging(
level='DEBUG' if FLAGS.proc_verbose else 'INFO',
colored_output=True
)
# Parse dataclass config from remaining flags
import sys
processing_config = processing_parser(sys.argv[1:])
# Use both traditional flags and dataclass config
absl.logging.info(f"Model path: {FLAGS.model_path}")
absl.logging.info(f"Threshold: {FLAGS.threshold}")
absl.logging.info(f"Batch size: {processing_config.batch_size}")
absl.logging.info(f"Output format: {processing_config.output_format}")
# Run processing pipeline
run_pipeline(FLAGS.model_path, FLAGS.threshold, processing_config)
def run_pipeline(model_path: str, threshold: float, config: ProcessingConfig):
"""Run the processing pipeline."""
absl.logging.info("Starting pipeline...")
# Pipeline logic here
if __name__ == '__main__':
absl.app.run(main)from etils import eapp
from dataclasses import dataclass, field
from typing import List
import logging
import sys
@dataclass
class ValidationConfig:
"""Configuration with validation rules."""
learning_rate: float = 0.001
batch_size: int = 32
model_layers: List[int] = field(default_factory=lambda: [128, 64])
def __post_init__(self):
"""Validate configuration after parsing."""
if self.learning_rate <= 0:
raise ValueError("learning_rate must be positive")
if self.batch_size <= 0:
raise ValueError("batch_size must be positive")
if not self.model_layers:
raise ValueError("model_layers cannot be empty")
if any(layer <= 0 for layer in self.model_layers):
raise ValueError("All layer sizes must be positive")
def main():
"""Main with configuration validation."""
eapp.better_logging('INFO')
try:
parser = eapp.make_flags_parser(ValidationConfig)
config = parser(sys.argv[1:])
logging.info("Configuration validated successfully")
logging.info(f"Learning rate: {config.learning_rate}")
logging.info(f"Batch size: {config.batch_size}")
logging.info(f"Model layers: {config.model_layers}")
except ValueError as e:
logging.error(f"Configuration validation failed: {e}")
sys.exit(1)
except Exception as e:
logging.error(f"Failed to parse configuration: {e}")
sys.exit(1)
if __name__ == "__main__":
main()Install with Tessl CLI
npx tessl i tessl/pypi-etils