tessl/pypi-parsl

Parallel scripting library for executing workflows across diverse computing resources

—

Pending

Overview

Eval results

Files

Resource Providers

Name: tessl/pypi-parsl
Author: tessl

Parsl resource providers interface with various computing platforms, job schedulers, and cloud services to provision and manage computing resources for parallel execution. Each provider is specialized for specific resource types and management systems.

Capabilities

Local Provider

Executes tasks on the local machine using multiple processes. Ideal for development, testing, and small-scale parallel workloads.

class LocalProvider:
    def __init__(self, channel=None, nodes_per_block=1, init_blocks=1,
                 min_blocks=0, max_blocks=1, parallelism=1, walltime="00:10:00",
                 cmd_timeout=30, launcher=None, move_files=None, worker_init=''):
        """
        Local execution provider for running tasks on local machine.
        
        Parameters:
        - nodes_per_block: Processes per block (default: 1)
        - init_blocks: Initial number of blocks (default: 1)  
        - min_blocks: Minimum blocks to maintain (default: 0)
        - max_blocks: Maximum blocks allowed (default: 1)
        - parallelism: Provider parallelism level (default: 1)
        - walltime: Maximum block runtime (default: "00:10:00")
        - launcher: Task launcher (default: SimpleLauncher)
        - worker_init: Initialization commands for workers
        """

Usage Example:

from parsl.providers import LocalProvider
from parsl.executors import HighThroughputExecutor

local_htex = HighThroughputExecutor(
    label='local_parallel',
    cores_per_worker=2,
    max_workers=4,
    provider=LocalProvider(
        init_blocks=1,
        max_blocks=2,
        nodes_per_block=1,
        walltime="01:00:00"
    )
)

SLURM Provider

Interfaces with SLURM workload manager for job submission and resource management on HPC clusters.

class SlurmProvider:
    def __init__(self, partition=None, account=None, channel=None, nodes_per_block=1,
                 cores_per_node=None, mem_per_node=None, init_blocks=1, min_blocks=0,
                 max_blocks=1, parallelism=1, walltime="00:10:00", scheduler_options='',
                 worker_init='', cmd_timeout=30, launcher=None, move_files=None,
                 exclusive=True, qos=None):
        """
        SLURM execution provider for HPC cluster job submission.
        
        Parameters:
        - partition: SLURM partition name
        - account: SLURM account for billing
        - nodes_per_block: Nodes per resource block
        - cores_per_node: CPU cores per node
        - mem_per_node: Memory per node (e.g., '4GB', '1000MB')
        - walltime: Job walltime limit (HH:MM:SS format)
        - scheduler_options: Additional SLURM directives
        - exclusive: Request exclusive node access (default: True)
        - qos: Quality of Service specification
        - launcher: Job launcher (default: SrunLauncher)
        """

Usage Example:

from parsl.providers import SlurmProvider
from parsl.launchers import SrunLauncher

slurm_provider = SlurmProvider(
    partition='compute',
    account='my_project',
    nodes_per_block=2,
    cores_per_node=24,
    mem_per_node='100GB',
    init_blocks=1,
    max_blocks=10,
    walltime='04:00:00',
    scheduler_options='#SBATCH --constraint=haswell',
    launcher=SrunLauncher()
)

AWS Provider

Provisions and manages Amazon EC2 instances for cloud-based parallel computing.

class AWSProvider:
    def __init__(self, image_id, instance_type, region='us-east-1', key_name=None,
                 security_groups=None, subnet_id=None, iam_instance_profile_arn=None,
                 iam_instance_profile_name=None, state_file=None, spot_max_bid=0,
                 nodes_per_block=1, init_blocks=1, min_blocks=0, max_blocks=1,
                 parallelism=1, walltime="00:10:00", launcher=None, worker_init=''):
        """
        AWS EC2 provider for cloud computing resources.
        
        Parameters:
        - image_id: EC2 AMI ID
        - instance_type: EC2 instance type (e.g., 't3.medium', 'c5.xlarge')
        - region: AWS region (default: 'us-east-1')
        - key_name: EC2 key pair name for SSH access
        - security_groups: List of security group names
        - subnet_id: VPC subnet ID
        - spot_max_bid: Maximum bid for spot instances (0 for on-demand)
        - state_file: File to store instance state information
        """

Usage Example:

from parsl.providers import AWSProvider

aws_provider = AWSProvider(
    image_id='ami-0abcdef1234567890',
    instance_type='c5.2xlarge',
    region='us-west-2',
    key_name='my-keypair',
    security_groups=['parsl-sg'],
    spot_max_bid=0.10,  # $0.10/hour max for spot instances
    init_blocks=1,
    max_blocks=5,
    nodes_per_block=1
)

Kubernetes Provider

Manages containerized workloads on Kubernetes clusters with automatic scaling and resource management.

class KubernetesProvider:
    def __init__(self, namespace='default', image=None, nodes_per_block=1,
                 init_blocks=1, min_blocks=0, max_blocks=1, max_cpu=1, max_mem="1Gi",
                 parallelism=1, worker_init='', pod_name=None, user_id=None,
                 group_id=None, run_as_non_root=False, persistent_volumes=None,
                 secret=None, incluster_config=True):
        """
        Kubernetes provider for container orchestration.
        
        Parameters:
        - namespace: Kubernetes namespace (default: 'default')
        - image: Container image for workers
        - max_cpu: CPU limit per pod (default: 1)
        - max_mem: Memory limit per pod (default: "1Gi")
        - persistent_volumes: List of persistent volume configurations
        - secret: Kubernetes secret for authentication
        - incluster_config: Use in-cluster config (default: True)
        - run_as_non_root: Run containers as non-root user
        """

Usage Example:

from parsl.providers import KubernetesProvider

k8s_provider = KubernetesProvider(
    namespace='parsl-workflows',
    image='python:3.9-slim',
    max_cpu=2,
    max_mem='4Gi',
    init_blocks=1,
    max_blocks=10,
    persistent_volumes=[{
        'name': 'shared-data',
        'mount_path': '/data',
        'claim_name': 'shared-pvc'
    }]
)

Google Cloud Provider

Manages Google Compute Engine instances for cloud-based parallel execution.

class GoogleCloudProvider:
    def __init__(self, project_id, zone, machine_type='n1-standard-1',
                 image_id=None, disk_size_gb=10, nodes_per_block=1,
                 init_blocks=1, min_blocks=0, max_blocks=1, parallelism=1,
                 walltime="00:10:00", launcher=None, worker_init=''):
        """
        Google Cloud Platform provider for compute resources.
        
        Parameters:
        - project_id: GCP project ID
        - zone: GCP zone (e.g., 'us-central1-a')
        - machine_type: Instance machine type (e.g., 'n1-standard-4')
        - image_id: VM image family or specific image
        - disk_size_gb: Boot disk size in GB (default: 10)
        """

Usage Example:

from parsl.providers import GoogleCloudProvider

gcp_provider = GoogleCloudProvider(
    project_id='my-gcp-project',
    zone='us-central1-a',
    machine_type='n1-standard-4',
    image_id='projects/ubuntu-os-cloud/global/images/family/ubuntu-2004-lts',
    disk_size_gb=50,
    init_blocks=1,
    max_blocks=8
)

Azure Provider

Provisions and manages Microsoft Azure virtual machines for cloud computing.

class AzureProvider:
    def __init__(self, vm_size='Standard_D1_v2', region='eastus',
                 image=None, nodes_per_block=1, init_blocks=1, min_blocks=0,
                 max_blocks=1, parallelism=1, walltime="00:10:00",
                 launcher=None, worker_init=''):
        """
        Microsoft Azure provider for cloud computing resources.
        
        Parameters:
        - vm_size: Azure VM size (e.g., 'Standard_D4_v3')
        - region: Azure region (default: 'eastus')
        - image: VM image configuration dict
        """

HPC Scheduler Providers

Additional providers for common HPC job schedulers:

class LSFProvider:
    """IBM LSF (Load Sharing Facility) provider."""
    def __init__(self, queue=None, account=None, project=None, ...): ...

class PBSProProvider:
    """PBS Professional scheduler provider."""
    def __init__(self, queue=None, account=None, nodes_per_block=1, ...): ...

class TorqueProvider:
    """TORQUE resource manager provider."""
    def __init__(self, queue=None, nodes_per_block=1, ...): ...

class CondorProvider:
    """HTCondor high-throughput computing provider."""
    def __init__(self, nodes_per_block=1, init_blocks=1, ...): ...

class GridEngineProvider:
    """Sun/Oracle Grid Engine provider."""
    def __init__(self, queue=None, nodes_per_block=1, ...): ...

HPC Provider Examples:

from parsl.providers import LSFProvider, PBSProProvider
from parsl.launchers import JsrunLauncher, MpiRunLauncher

# IBM LSF on Summit supercomputer
lsf_provider = LSFProvider(
    queue='batch',
    account='project123',
    nodes_per_block=2,
    walltime='01:00:00',
    launcher=JsrunLauncher()
)

# PBS Pro cluster
pbs_provider = PBSProProvider(
    queue='normal',
    account='allocation123',
    nodes_per_block=4,
    cores_per_node=28,  
    walltime='02:00:00',
    launcher=MpiRunLauncher()
)

Provider Configuration Patterns

Multi-Provider Configuration

Use multiple providers for different types of workloads:

from parsl.config import Config
from parsl.executors import HighThroughputExecutor

config = Config(executors=[
    # Local development and testing
    HighThroughputExecutor(
        label='local_dev',
        max_workers=4,
        provider=LocalProvider(max_blocks=1)
    ),
    
    # HPC cluster for compute-intensive tasks
    HighThroughputExecutor(
        label='hpc_cluster',  
        max_workers=100,
        provider=SlurmProvider(
            partition='compute',
            nodes_per_block=4,
            max_blocks=25,
            walltime='04:00:00'
        )
    ),
    
    # Cloud bursting for overflow capacity
    HighThroughputExecutor(
        label='cloud_burst',
        max_workers=50,
        provider=AWSProvider(
            instance_type='c5.xlarge',
            max_blocks=10,
            spot_max_bid=0.20
        )
    )
])

Resource Scaling

Configure automatic resource scaling based on workload:

# Aggressive scaling for burst workloads
burst_provider = SlurmProvider(
    partition='burst',
    init_blocks=0,      # Start with no resources
    min_blocks=0,       # Scale down to zero when idle
    max_blocks=50,      # Scale up to 50 blocks
    parallelism=0.8,    # Submit 80% of pending tasks as blocks
    walltime='00:30:00' # Short walltime for responsiveness
)

# Conservative scaling for long-running workflows  
steady_provider = SlurmProvider(
    partition='normal',
    init_blocks=2,      # Always maintain 2 blocks
    min_blocks=2,       # Never scale below 2 blocks  
    max_blocks=10,      # Conservative maximum
    parallelism=0.3,    # Conservative submission rate
    walltime='12:00:00' # Long walltime for efficiency
)

Provider Error Handling

Handle provider-specific errors and resource failures:

from parsl.providers.errors import ProviderException, ScaleOutFailed

try:
    parsl.load(config)
except ProviderException as e:
    print(f"Provider initialization failed: {e}")
except ScaleOutFailed as e:
    print(f"Resource scaling failed: {e}")

# Monitor provider status
for executor in parsl.dfk().executors.values():
    provider = executor.provider
    print(f"Provider {provider.label}: {provider.resources}")

Install with Tessl CLI