Low-level, data-driven core of boto 3 providing foundational AWS service access.
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Automatic pagination for AWS operations that return large result sets, with built-in iterator support and result aggregation capabilities. The pagination system handles AWS service operations that return partial results with continuation tokens, providing seamless access to complete datasets.
Main class for creating paginated requests with automatic token handling.
class Paginator:
def __init__(self, method: callable, pagination_config: dict, model: OperationModel):
"""
Initialize paginator for an AWS operation.
Args:
method: Client method to paginate
pagination_config: Pagination configuration dictionary
model: Operation model for parameter validation
"""
@property
def result_keys(self) -> List[JMESPathExpression]:
"""JMESPath expressions for result keys in paginated responses."""
def paginate(self, **kwargs) -> PageIterator:
"""
Create page iterator for paginated results.
Args:
**kwargs: Operation parameters plus optional PaginationConfig
Returns:
PageIterator: Iterator over result pages
"""Iterator class for traversing paginated results with built-in aggregation support.
class PageIterator:
def __init__(
self,
method: callable,
input_token: List[str],
output_token: List[JMESPathExpression],
more_results: JMESPathExpression,
result_keys: List[JMESPathExpression],
non_aggregate_keys: List[JMESPathExpression],
limit_key: str,
max_items: int,
starting_token: str,
page_size: int,
op_kwargs: dict
):
"""
Initialize page iterator with pagination configuration.
Args:
method: Client method to call
input_token: Parameter names for pagination tokens
output_token: Response paths for next tokens
more_results: Expression to check if more results exist
result_keys: Expressions for extracting result data
non_aggregate_keys: Keys that should not be aggregated
limit_key: Parameter name for page size limit
max_items: Maximum total items to return
starting_token: Token to resume pagination
page_size: Number of items per page
op_kwargs: Operation parameters
"""
def __iter__(self) -> Iterator[dict]:
"""
Iterate through result pages.
Yields:
dict: Each page of results from the AWS API
"""
def build_full_result(self) -> dict:
"""
Aggregate all paginated results into a single response.
Returns:
dict: Complete aggregated results with continuation token if truncated
"""
def search(self, expression: str) -> Iterator:
"""
Apply JMESPath expression across all pages.
Args:
expression: JMESPath expression to apply to each page
Yields:
Results matching the JMESPath expression from each page
"""
@property
def result_keys(self) -> List[JMESPathExpression]:
"""JMESPath expressions for extracting result data."""
@property
def resume_token(self) -> str:
"""Token to resume pagination from current position."""
@property
def non_aggregate_part(self) -> dict:
"""Response data that should not be aggregated across pages."""Model class for pagination configuration management.
class PaginatorModel:
def __init__(self, paginator_config: dict):
"""
Initialize paginator model with configuration.
Args:
paginator_config: Paginator configuration dictionary
"""
def get_paginator(self, operation_name: str) -> dict:
"""
Get pagination configuration for operation.
Args:
operation_name: AWS operation name
Returns:
dict: Pagination configuration
Raises:
ValueError: If operation does not support pagination
"""Paginate through S3 bucket objects:
from botocore.session import get_session
# Create session and client
session = get_session()
s3_client = session.create_client('s3', region_name='us-east-1')
# Get paginator for list_objects_v2 operation
paginator = s3_client.get_paginator('list_objects_v2')
# Paginate through all objects
for page in paginator.paginate(Bucket='my-bucket'):
if 'Contents' in page:
for obj in page['Contents']:
print(f"Object: {obj['Key']}, Size: {obj['Size']}")Control pagination behavior with PaginationConfig:
from botocore.session import get_session
session = get_session()
ec2_client = session.create_client('ec2', region_name='us-west-2')
# Get paginator with custom configuration
paginator = ec2_client.get_paginator('describe_instances')
# Configure pagination limits
page_iterator = paginator.paginate(
PaginationConfig={
'MaxItems': 100, # Maximum total items to return
'PageSize': 20, # Items per API call
'StartingToken': None # Token to resume pagination
}
)
# Process each page
for page in page_iterator:
for reservation in page.get('Reservations', []):
for instance in reservation.get('Instances', []):
print(f"Instance: {instance['InstanceId']}")
# Check if results were truncated
result = page_iterator.build_full_result()
if 'NextToken' in result:
print(f"Results truncated. Resume with token: {result['NextToken']}")Aggregate all paginated results into a single response:
from botocore.session import get_session
session = get_session()
iam_client = session.create_client('iam', region_name='us-east-1')
# Get paginator and build complete result
paginator = iam_client.get_paginator('list_users')
page_iterator = paginator.paginate()
# Aggregate all results
complete_result = page_iterator.build_full_result()
# Access aggregated data
all_users = complete_result.get('Users', [])
print(f"Total users found: {len(all_users)}")
for user in all_users:
print(f"User: {user['UserName']}, Created: {user['CreateDate']}")Use JMESPath expressions to filter results across pages:
from botocore.session import get_session
session = get_session()
rds_client = session.create_client('rds', region_name='us-east-1')
# Get paginator for DB instances
paginator = rds_client.get_paginator('describe_db_instances')
page_iterator = paginator.paginate()
# Search for running instances across all pages
running_instances = page_iterator.search(
'DBInstances[?DBInstanceStatus==`available`].DBInstanceIdentifier'
)
print("Available DB instances:")
for instance_id in running_instances:
print(f" - {instance_id}")Resume pagination from a previous position:
from botocore.session import get_session
session = get_session()
logs_client = session.create_client('logs', region_name='us-east-1')
# Initial pagination with limit
paginator = logs_client.get_paginator('describe_log_groups')
page_iterator = paginator.paginate(
PaginationConfig={
'MaxItems': 50,
'PageSize': 10
}
)
# Process partial results
result = page_iterator.build_full_result()
log_groups = result.get('LogGroups', [])
print(f"Retrieved {len(log_groups)} log groups")
# Resume if more results available
if 'NextToken' in result:
resume_token = result['NextToken']
# Continue pagination from where we left off
page_iterator = paginator.paginate(
PaginationConfig={
'StartingToken': resume_token,
'MaxItems': 50,
'PageSize': 10
}
)
# Process remaining results
remaining_result = page_iterator.build_full_result()
remaining_groups = remaining_result.get('LogGroups', [])
print(f"Retrieved {len(remaining_groups)} additional log groups")Verify if an operation supports pagination:
from botocore.session import get_session
session = get_session()
lambda_client = session.create_client('lambda', region_name='us-east-1')
# Check if operation can be paginated
if lambda_client.can_paginate('list_functions'):
paginator = lambda_client.get_paginator('list_functions')
for page in paginator.paginate():
functions = page.get('Functions', [])
for func in functions:
print(f"Function: {func['FunctionName']}")
else:
print("list_functions does not support pagination")Process results with custom logic during iteration:
from botocore.session import get_session
session = get_session()
s3_client = session.create_client('s3', region_name='us-east-1')
paginator = s3_client.get_paginator('list_objects_v2')
total_size = 0
object_count = 0
for page in paginator.paginate(Bucket='my-bucket'):
contents = page.get('Contents', [])
for obj in contents:
total_size += obj['Size']
object_count += 1
# Process large objects differently
if obj['Size'] > 100 * 1024 * 1024: # 100MB
print(f"Large object: {obj['Key']} ({obj['Size']} bytes)")
print(f"Total: {object_count} objects, {total_size} bytes")Handle operations with multiple result arrays:
from botocore.session import get_session
session = get_session()
ec2_client = session.create_client('ec2', region_name='us-west-2')
paginator = ec2_client.get_paginator('describe_instances')
page_iterator = paginator.paginate()
# Build full result aggregates all result keys
complete_result = page_iterator.build_full_result()
# Access aggregated reservations
reservations = complete_result.get('Reservations', [])
print(f"Total reservations: {len(reservations)}")
# Process all instances across reservations
all_instances = []
for reservation in reservations:
all_instances.extend(reservation.get('Instances', []))
print(f"Total instances: {len(all_instances)}")Handle pagination-specific errors:
from botocore.session import get_session
from botocore.exceptions import PaginationError, ClientError
session = get_session()
dynamodb_client = session.create_client('dynamodb', region_name='us-east-1')
try:
paginator = dynamodb_client.get_paginator('scan')
# This might fail if PageSize is not supported
page_iterator = paginator.paginate(
TableName='my-table',
PaginationConfig={
'PageSize': 100, # Not all operations support PageSize
'MaxItems': 1000
}
)
result = page_iterator.build_full_result()
items = result.get('Items', [])
print(f"Scanned {len(items)} items")
except PaginationError as e:
print(f"Pagination error: {e}")
# Retry without PageSize
page_iterator = paginator.paginate(
TableName='my-table',
PaginationConfig={'MaxItems': 1000}
)
result = page_iterator.build_full_result()
items = result.get('Items', [])
print(f"Scanned {len(items)} items (without PageSize)")
except ClientError as e:
error_code = e.response['Error']['Code']
if error_code == 'ResourceNotFoundException':
print("Table not found")
else:
print(f"AWS error: {error_code}")Handle invalid starting tokens:
from botocore.session import get_session
session = get_session()
s3_client = session.create_client('s3', region_name='us-east-1')
# Simulate invalid token
invalid_token = "invalid_token_string"
try:
paginator = s3_client.get_paginator('list_objects_v2')
page_iterator = paginator.paginate(
Bucket='my-bucket',
PaginationConfig={'StartingToken': invalid_token}
)
# This will fail during iteration
for page in page_iterator:
print("This won't be reached")
except ValueError as e:
if "Bad starting token" in str(e):
print("Invalid starting token provided")
# Restart pagination without token
page_iterator = paginator.paginate(Bucket='my-bucket')
for page in page_iterator:
contents = page.get('Contents', [])
print(f"Page has {len(contents)} objects")
break # Process first page onlyAccess paginators through client methods:
from botocore.session import get_session
session = get_session()
client = session.create_client('s3', region_name='us-east-1')
# Check if operation supports pagination
operation_name = 'list_objects_v2'
if client.can_paginate(operation_name):
# Get the paginator
paginator = client.get_paginator(operation_name)
# Use paginator
page_iterator = paginator.paginate(Bucket='my-bucket')
for page in page_iterator:
print(f"Page contains {len(page.get('Contents', []))} objects")
else:
print(f"{operation_name} does not support pagination")Pagination works with service models for parameter validation:
from botocore.session import get_session
session = get_session()
cloudformation_client = session.create_client('cloudformation', region_name='us-east-1')
# Get service model information
service_model = cloudformation_client.meta.service_model
operation_model = service_model.operation_model('list_stacks')
print(f"Operation: {operation_model.name}")
print(f"Input shape: {operation_model.input_shape}")
# Use paginator with the operation
paginator = cloudformation_client.get_paginator('list_stacks')
# Pagination respects operation model validation
try:
page_iterator = paginator.paginate(
StackStatusFilter=['CREATE_COMPLETE', 'UPDATE_COMPLETE']
)
stacks = []
for page in page_iterator:
stacks.extend(page.get('StackSummaries', []))
print(f"Found {len(stacks)} stacks")
except Exception as e:
print(f"Error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-botocore