0
# AWS S3 Integration
1
2
Complete AWS S3 support with advanced features including multipart uploads, transfer acceleration, custom endpoints, and S3-specific metadata access. This implementation provides full compatibility with AWS S3 and S3-compatible services.
3
4
## Capabilities
5
6
### S3Path Class
7
8
S3-specific path implementation with access to S3 metadata and operations.
9
10
```python { .api }
11
class S3Path(CloudPath):
12
"""AWS S3 path implementation."""
13
14
@property
15
def bucket(self) -> str:
16
"""
17
S3 bucket name.
18
19
Returns:
20
Bucket name from the S3 URI
21
"""
22
23
@property
24
def key(self) -> str:
25
"""
26
S3 object key (path within bucket).
27
28
Returns:
29
Object key string
30
"""
31
32
@property
33
def etag(self) -> str:
34
"""
35
S3 object ETag identifier.
36
37
Returns:
38
ETag string for the object
39
"""
40
```
41
42
### S3Client Class
43
44
S3 client for authentication and service configuration.
45
46
```python { .api }
47
class S3Client:
48
"""AWS S3 client with comprehensive configuration options."""
49
50
def __init__(
51
self,
52
aws_access_key_id: str = None,
53
aws_secret_access_key: str = None,
54
aws_session_token: str = None,
55
no_sign_request: bool = False,
56
botocore_session = None,
57
profile_name: str = None,
58
boto3_session = None,
59
file_cache_mode: FileCacheMode = None,
60
local_cache_dir: str = None,
61
endpoint_url: str = None,
62
boto3_transfer_config = None,
63
content_type_method = None,
64
extra_args: dict = None
65
):
66
"""
67
Initialize S3 client.
68
69
Args:
70
aws_access_key_id: AWS access key ID
71
aws_secret_access_key: AWS secret access key
72
aws_session_token: AWS session token for temporary credentials
73
no_sign_request: Make unsigned requests (for public buckets)
74
botocore_session: Custom botocore session
75
profile_name: AWS profile name from credentials file
76
boto3_session: Custom boto3 session
77
file_cache_mode: Cache management strategy
78
local_cache_dir: Local directory for file cache
79
endpoint_url: Custom S3 endpoint URL
80
boto3_transfer_config: Transfer configuration for multipart uploads
81
content_type_method: Function to determine MIME types
82
extra_args: Additional arguments for S3 operations
83
"""
84
```
85
86
## Usage Examples
87
88
### Basic S3 Operations
89
90
```python
91
from cloudpathlib import S3Path, S3Client
92
93
# Create S3 path (uses default client)
94
s3_path = S3Path("s3://my-bucket/data/file.txt")
95
96
# Access S3-specific properties
97
print(f"Bucket: {s3_path.bucket}") # "my-bucket"
98
print(f"Key: {s3_path.key}") # "data/file.txt"
99
100
# Check if object exists
101
if s3_path.exists():
102
print(f"ETag: {s3_path.etag}")
103
```
104
105
### S3 Client Configuration
106
107
```python
108
# Configure S3 client with credentials
109
client = S3Client(
110
aws_access_key_id="your-access-key",
111
aws_secret_access_key="your-secret-key"
112
)
113
114
# Set as default client
115
client.set_as_default_client()
116
117
# Use with paths
118
s3_path = S3Path("s3://my-bucket/file.txt") # Uses configured client
119
```
120
121
### AWS Profile Authentication
122
123
```python
124
# Use AWS profile from ~/.aws/credentials
125
client = S3Client(profile_name="my-profile")
126
client.set_as_default_client()
127
128
# Create paths using profile
129
s3_path = S3Path("s3://my-bucket/data.json")
130
content = s3_path.read_text()
131
```
132
133
### Session Token Authentication
134
135
```python
136
# Use temporary credentials with session token
137
client = S3Client(
138
aws_access_key_id="temp-access-key",
139
aws_secret_access_key="temp-secret-key",
140
aws_session_token="session-token"
141
)
142
143
# Work with temporary credentials
144
s3_path = S3Path("s3://secure-bucket/confidential.txt", client=client)
145
```
146
147
### Public Bucket Access
148
149
```python
150
# Access public S3 buckets without credentials
151
client = S3Client(no_sign_request=True)
152
153
# Work with public data
154
public_path = S3Path("s3://public-bucket/open-data.csv", client=client)
155
data = public_path.read_text()
156
```
157
158
### Custom S3 Endpoints
159
160
```python
161
# Use S3-compatible services (MinIO, Ceph, etc.)
162
client = S3Client(
163
endpoint_url="https://s3.my-company.com",
164
aws_access_key_id="minio-access-key",
165
aws_secret_access_key="minio-secret-key"
166
)
167
168
# Work with custom endpoint
169
s3_path = S3Path("s3://internal-bucket/file.txt", client=client)
170
```
171
172
### Multipart Upload Configuration
173
174
```python
175
import boto3
176
177
# Configure transfer settings for large files
178
transfer_config = boto3.s3.transfer.TransferConfig(
179
multipart_threshold=1024 * 25, # 25MB
180
max_concurrency=10,
181
multipart_chunksize=1024 * 25,
182
use_threads=True
183
)
184
185
client = S3Client(boto3_transfer_config=transfer_config)
186
187
# Upload large file with optimized settings
188
large_file = S3Path("s3://my-bucket/large-file.zip", client=client)
189
large_file.upload_from("local-large-file.zip")
190
```
191
192
### S3 Storage Classes
193
194
```python
195
# Upload with specific storage class
196
client = S3Client(extra_args={"StorageClass": "GLACIER"})
197
198
# Upload file to Glacier
199
s3_path = S3Path("s3://archive-bucket/archive.tar", client=client)
200
s3_path.upload_from("data.tar")
201
202
# Upload with different storage classes
203
storage_classes = {
204
"standard": S3Client(extra_args={"StorageClass": "STANDARD"}),
205
"ia": S3Client(extra_args={"StorageClass": "STANDARD_IA"}),
206
"glacier": S3Client(extra_args={"StorageClass": "GLACIER"}),
207
"deep_archive": S3Client(extra_args={"StorageClass": "DEEP_ARCHIVE"})
208
}
209
210
# Use appropriate storage class
211
file_path = S3Path("s3://my-bucket/backup.zip", client=storage_classes["glacier"])
212
```
213
214
### Server-Side Encryption
215
216
```python
217
# Configure server-side encryption
218
client = S3Client(extra_args={
219
"ServerSideEncryption": "AES256"
220
})
221
222
# Upload encrypted file
223
encrypted_path = S3Path("s3://secure-bucket/encrypted.txt", client=client)
224
encrypted_path.write_text("Sensitive data")
225
226
# Use KMS encryption
227
kms_client = S3Client(extra_args={
228
"ServerSideEncryption": "aws:kms",
229
"SSEKMSKeyId": "your-kms-key-id"
230
})
231
```
232
233
### Metadata and Tags
234
235
```python
236
# Upload with metadata
237
client = S3Client(extra_args={
238
"Metadata": {
239
"Author": "Data Team",
240
"Project": "Analytics",
241
"Version": "1.0"
242
},
243
"Tagging": "Environment=Production&Department=Analytics"
244
})
245
246
s3_path = S3Path("s3://my-bucket/report.pdf", client=client)
247
s3_path.upload_from("monthly-report.pdf")
248
```
249
250
### Presigned URLs
251
252
```python
253
# Generate presigned URLs for S3
254
s3_path = S3Path("s3://private-bucket/document.pdf")
255
256
# Download URL (valid for 1 hour)
257
download_url = s3_path.as_url(presign=True, expire_seconds=3600)
258
print(f"Download: {download_url}")
259
260
# Share with expiration
261
share_url = s3_path.as_url(presign=True, expire_seconds=86400) # 24 hours
262
print(f"Share URL: {share_url}")
263
```
264
265
### S3 Select Operations
266
267
```python
268
# Note: S3 Select requires direct boto3 usage
269
# This is an example of extending S3Path for advanced operations
270
271
class ExtendedS3Path(S3Path):
272
def select_object_content(self, expression, input_serialization, output_serialization):
273
"""Perform S3 Select query on object."""
274
response = self.client.boto3_session.client('s3').select_object_content(
275
Bucket=self.bucket,
276
Key=self.key,
277
Expression=expression,
278
ExpressionType='SQL',
279
InputSerialization=input_serialization,
280
OutputSerialization=output_serialization
281
)
282
283
# Process streaming response
284
for event in response['Payload']:
285
if 'Records' in event:
286
yield event['Records']['Payload'].decode('utf-8')
287
288
# Usage
289
csv_path = ExtendedS3Path("s3://data-bucket/large-dataset.csv")
290
query = "SELECT * FROM S3Object s WHERE s.category = 'important'"
291
292
for chunk in csv_path.select_object_content(
293
expression=query,
294
input_serialization={'CSV': {'FileHeaderInfo': 'Use'}},
295
output_serialization={'CSV': {}}
296
):
297
process_chunk(chunk)
298
```
299
300
### Batch Operations
301
302
```python
303
# Upload multiple files efficiently
304
import concurrent.futures
305
from pathlib import Path
306
307
def upload_file(local_path, s3_base):
308
s3_path = s3_base / local_path.name
309
s3_path.upload_from(local_path)
310
return s3_path
311
312
# Parallel uploads
313
local_files = list(Path("data/").glob("*.csv"))
314
s3_base = S3Path("s3://my-bucket/csv-data/")
315
316
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
317
futures = [executor.submit(upload_file, f, s3_base) for f in local_files]
318
319
for future in concurrent.futures.as_completed(futures):
320
s3_path = future.result()
321
print(f"Uploaded: {s3_path}")
322
```
323
324
### Lifecycle Management
325
326
```python
327
# Work with different lifecycle stages
328
def get_storage_class_client(storage_class):
329
return S3Client(extra_args={"StorageClass": storage_class})
330
331
# Archive old files
332
cutoff_date = datetime.now() - timedelta(days=365)
333
archive_client = get_storage_class_client("GLACIER")
334
335
for s3_file in S3Path("s3://my-bucket/logs/").rglob("*.log"):
336
if s3_file.stat().st_mtime < cutoff_date.timestamp():
337
# Copy to Glacier storage
338
archive_path = S3Path(str(s3_file), client=archive_client)
339
s3_file.copy(archive_path)
340
print(f"Archived: {s3_file}")
341
```
342
343
### Cross-Region Operations
344
345
```python
346
# Work with buckets in different regions
347
us_east_client = S3Client(
348
aws_access_key_id="key",
349
aws_secret_access_key="secret",
350
region_name="us-east-1"
351
)
352
353
eu_west_client = S3Client(
354
aws_access_key_id="key",
355
aws_secret_access_key="secret",
356
region_name="eu-west-1"
357
)
358
359
# Copy between regions
360
source = S3Path("s3://us-bucket/data.txt", client=us_east_client)
361
destination = S3Path("s3://eu-bucket/data.txt", client=eu_west_client)
362
363
source.copy(destination)
364
```
365
366
### Error Handling
367
368
```python
369
from cloudpathlib import (
370
CloudPathFileNotFoundError,
371
MissingCredentialsError,
372
InvalidPrefixError
373
)
374
import botocore.exceptions
375
376
try:
377
s3_path = S3Path("s3://nonexistent-bucket/file.txt")
378
content = s3_path.read_text()
379
except CloudPathFileNotFoundError:
380
print("S3 object not found")
381
except botocore.exceptions.NoCredentialsError:
382
print("AWS credentials not configured")
383
except botocore.exceptions.BotoCoreError as e:
384
print(f"AWS error: {e}")
385
```