Parallel scripting library for executing workflows across diverse computing resources
—
Parsl resource providers interface with various computing platforms, job schedulers, and cloud services to provision and manage computing resources for parallel execution. Each provider is specialized for specific resource types and management systems.
Executes tasks on the local machine using multiple processes. Ideal for development, testing, and small-scale parallel workloads.
class LocalProvider:
def __init__(self, channel=None, nodes_per_block=1, init_blocks=1,
min_blocks=0, max_blocks=1, parallelism=1, walltime="00:10:00",
cmd_timeout=30, launcher=None, move_files=None, worker_init=''):
"""
Local execution provider for running tasks on local machine.
Parameters:
- nodes_per_block: Processes per block (default: 1)
- init_blocks: Initial number of blocks (default: 1)
- min_blocks: Minimum blocks to maintain (default: 0)
- max_blocks: Maximum blocks allowed (default: 1)
- parallelism: Provider parallelism level (default: 1)
- walltime: Maximum block runtime (default: "00:10:00")
- launcher: Task launcher (default: SimpleLauncher)
- worker_init: Initialization commands for workers
"""Usage Example:
from parsl.providers import LocalProvider
from parsl.executors import HighThroughputExecutor
local_htex = HighThroughputExecutor(
label='local_parallel',
cores_per_worker=2,
max_workers=4,
provider=LocalProvider(
init_blocks=1,
max_blocks=2,
nodes_per_block=1,
walltime="01:00:00"
)
)Interfaces with SLURM workload manager for job submission and resource management on HPC clusters.
class SlurmProvider:
def __init__(self, partition=None, account=None, channel=None, nodes_per_block=1,
cores_per_node=None, mem_per_node=None, init_blocks=1, min_blocks=0,
max_blocks=1, parallelism=1, walltime="00:10:00", scheduler_options='',
worker_init='', cmd_timeout=30, launcher=None, move_files=None,
exclusive=True, qos=None):
"""
SLURM execution provider for HPC cluster job submission.
Parameters:
- partition: SLURM partition name
- account: SLURM account for billing
- nodes_per_block: Nodes per resource block
- cores_per_node: CPU cores per node
- mem_per_node: Memory per node (e.g., '4GB', '1000MB')
- walltime: Job walltime limit (HH:MM:SS format)
- scheduler_options: Additional SLURM directives
- exclusive: Request exclusive node access (default: True)
- qos: Quality of Service specification
- launcher: Job launcher (default: SrunLauncher)
"""Usage Example:
from parsl.providers import SlurmProvider
from parsl.launchers import SrunLauncher
slurm_provider = SlurmProvider(
partition='compute',
account='my_project',
nodes_per_block=2,
cores_per_node=24,
mem_per_node='100GB',
init_blocks=1,
max_blocks=10,
walltime='04:00:00',
scheduler_options='#SBATCH --constraint=haswell',
launcher=SrunLauncher()
)Provisions and manages Amazon EC2 instances for cloud-based parallel computing.
class AWSProvider:
def __init__(self, image_id, instance_type, region='us-east-1', key_name=None,
security_groups=None, subnet_id=None, iam_instance_profile_arn=None,
iam_instance_profile_name=None, state_file=None, spot_max_bid=0,
nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=1,
parallelism=1, walltime="00:10:00", launcher=None, worker_init=''):
"""
AWS EC2 provider for cloud computing resources.
Parameters:
- image_id: EC2 AMI ID
- instance_type: EC2 instance type (e.g., 't3.medium', 'c5.xlarge')
- region: AWS region (default: 'us-east-1')
- key_name: EC2 key pair name for SSH access
- security_groups: List of security group names
- subnet_id: VPC subnet ID
- spot_max_bid: Maximum bid for spot instances (0 for on-demand)
- state_file: File to store instance state information
"""Usage Example:
from parsl.providers import AWSProvider
aws_provider = AWSProvider(
image_id='ami-0abcdef1234567890',
instance_type='c5.2xlarge',
region='us-west-2',
key_name='my-keypair',
security_groups=['parsl-sg'],
spot_max_bid=0.10, # $0.10/hour max for spot instances
init_blocks=1,
max_blocks=5,
nodes_per_block=1
)Manages containerized workloads on Kubernetes clusters with automatic scaling and resource management.
class KubernetesProvider:
def __init__(self, namespace='default', image=None, nodes_per_block=1,
init_blocks=1, min_blocks=0, max_blocks=1, max_cpu=1, max_mem="1Gi",
parallelism=1, worker_init='', pod_name=None, user_id=None,
group_id=None, run_as_non_root=False, persistent_volumes=None,
secret=None, incluster_config=True):
"""
Kubernetes provider for container orchestration.
Parameters:
- namespace: Kubernetes namespace (default: 'default')
- image: Container image for workers
- max_cpu: CPU limit per pod (default: 1)
- max_mem: Memory limit per pod (default: "1Gi")
- persistent_volumes: List of persistent volume configurations
- secret: Kubernetes secret for authentication
- incluster_config: Use in-cluster config (default: True)
- run_as_non_root: Run containers as non-root user
"""Usage Example:
from parsl.providers import KubernetesProvider
k8s_provider = KubernetesProvider(
namespace='parsl-workflows',
image='python:3.9-slim',
max_cpu=2,
max_mem='4Gi',
init_blocks=1,
max_blocks=10,
persistent_volumes=[{
'name': 'shared-data',
'mount_path': '/data',
'claim_name': 'shared-pvc'
}]
)Manages Google Compute Engine instances for cloud-based parallel execution.
class GoogleCloudProvider:
def __init__(self, project_id, zone, machine_type='n1-standard-1',
image_id=None, disk_size_gb=10, nodes_per_block=1,
init_blocks=1, min_blocks=0, max_blocks=1, parallelism=1,
walltime="00:10:00", launcher=None, worker_init=''):
"""
Google Cloud Platform provider for compute resources.
Parameters:
- project_id: GCP project ID
- zone: GCP zone (e.g., 'us-central1-a')
- machine_type: Instance machine type (e.g., 'n1-standard-4')
- image_id: VM image family or specific image
- disk_size_gb: Boot disk size in GB (default: 10)
"""Usage Example:
from parsl.providers import GoogleCloudProvider
gcp_provider = GoogleCloudProvider(
project_id='my-gcp-project',
zone='us-central1-a',
machine_type='n1-standard-4',
image_id='projects/ubuntu-os-cloud/global/images/family/ubuntu-2004-lts',
disk_size_gb=50,
init_blocks=1,
max_blocks=8
)Provisions and manages Microsoft Azure virtual machines for cloud computing.
class AzureProvider:
def __init__(self, vm_size='Standard_D1_v2', region='eastus',
image=None, nodes_per_block=1, init_blocks=1, min_blocks=0,
max_blocks=1, parallelism=1, walltime="00:10:00",
launcher=None, worker_init=''):
"""
Microsoft Azure provider for cloud computing resources.
Parameters:
- vm_size: Azure VM size (e.g., 'Standard_D4_v3')
- region: Azure region (default: 'eastus')
- image: VM image configuration dict
"""Additional providers for common HPC job schedulers:
class LSFProvider:
"""IBM LSF (Load Sharing Facility) provider."""
def __init__(self, queue=None, account=None, project=None, ...): ...
class PBSProProvider:
"""PBS Professional scheduler provider."""
def __init__(self, queue=None, account=None, nodes_per_block=1, ...): ...
class TorqueProvider:
"""TORQUE resource manager provider."""
def __init__(self, queue=None, nodes_per_block=1, ...): ...
class CondorProvider:
"""HTCondor high-throughput computing provider."""
def __init__(self, nodes_per_block=1, init_blocks=1, ...): ...
class GridEngineProvider:
"""Sun/Oracle Grid Engine provider."""
def __init__(self, queue=None, nodes_per_block=1, ...): ...HPC Provider Examples:
from parsl.providers import LSFProvider, PBSProProvider
from parsl.launchers import JsrunLauncher, MpiRunLauncher
# IBM LSF on Summit supercomputer
lsf_provider = LSFProvider(
queue='batch',
account='project123',
nodes_per_block=2,
walltime='01:00:00',
launcher=JsrunLauncher()
)
# PBS Pro cluster
pbs_provider = PBSProProvider(
queue='normal',
account='allocation123',
nodes_per_block=4,
cores_per_node=28,
walltime='02:00:00',
launcher=MpiRunLauncher()
)Use multiple providers for different types of workloads:
from parsl.config import Config
from parsl.executors import HighThroughputExecutor
config = Config(executors=[
# Local development and testing
HighThroughputExecutor(
label='local_dev',
max_workers=4,
provider=LocalProvider(max_blocks=1)
),
# HPC cluster for compute-intensive tasks
HighThroughputExecutor(
label='hpc_cluster',
max_workers=100,
provider=SlurmProvider(
partition='compute',
nodes_per_block=4,
max_blocks=25,
walltime='04:00:00'
)
),
# Cloud bursting for overflow capacity
HighThroughputExecutor(
label='cloud_burst',
max_workers=50,
provider=AWSProvider(
instance_type='c5.xlarge',
max_blocks=10,
spot_max_bid=0.20
)
)
])Configure automatic resource scaling based on workload:
# Aggressive scaling for burst workloads
burst_provider = SlurmProvider(
partition='burst',
init_blocks=0, # Start with no resources
min_blocks=0, # Scale down to zero when idle
max_blocks=50, # Scale up to 50 blocks
parallelism=0.8, # Submit 80% of pending tasks as blocks
walltime='00:30:00' # Short walltime for responsiveness
)
# Conservative scaling for long-running workflows
steady_provider = SlurmProvider(
partition='normal',
init_blocks=2, # Always maintain 2 blocks
min_blocks=2, # Never scale below 2 blocks
max_blocks=10, # Conservative maximum
parallelism=0.3, # Conservative submission rate
walltime='12:00:00' # Long walltime for efficiency
)Handle provider-specific errors and resource failures:
from parsl.providers.errors import ProviderException, ScaleOutFailed
try:
parsl.load(config)
except ProviderException as e:
print(f"Provider initialization failed: {e}")
except ScaleOutFailed as e:
print(f"Resource scaling failed: {e}")
# Monitor provider status
for executor in parsl.dfk().executors.values():
provider = executor.provider
print(f"Provider {provider.label}: {provider.resources}")Install with Tessl CLI
npx tessl i tessl/pypi-parsl