Tessl Tile for pypi/cloudpathlib@0.22.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

anypath.md azure-integration.md client-management.md cloud-operations.md configuration.md core-operations.md directory-operations.md exceptions.md file-io.md gcs-integration.md http-support.md index.md patching.md s3-integration.md

s3-integration.mddocs/

0
# AWS S3 Integration
1

2
Complete AWS S3 support with advanced features including multipart uploads, transfer acceleration, custom endpoints, and S3-specific metadata access. This implementation provides full compatibility with AWS S3 and S3-compatible services.
3

4
## Capabilities
5

6
### S3Path Class
7

8
S3-specific path implementation with access to S3 metadata and operations.
9

10
```python { .api }
11
class S3Path(CloudPath):
12
    """AWS S3 path implementation."""
13
    
14
    @property
15
    def bucket(self) -> str:
16
        """
17
        S3 bucket name.
18
        
19
        Returns:
20
            Bucket name from the S3 URI
21
        """
22
    
23
    @property
24
    def key(self) -> str:
25
        """
26
        S3 object key (path within bucket).
27
        
28
        Returns:
29
            Object key string
30
        """
31
    
32
    @property
33
    def etag(self) -> str:
34
        """
35
        S3 object ETag identifier.
36
        
37
        Returns:
38
            ETag string for the object
39
        """
40
```
41

42
### S3Client Class
43

44
S3 client for authentication and service configuration.
45

46
```python { .api }
47
class S3Client:
48
    """AWS S3 client with comprehensive configuration options."""
49
    
50
    def __init__(
51
        self,
52
        aws_access_key_id: str = None,
53
        aws_secret_access_key: str = None,
54
        aws_session_token: str = None,
55
        no_sign_request: bool = False,
56
        botocore_session = None,
57
        profile_name: str = None,
58
        boto3_session = None,
59
        file_cache_mode: FileCacheMode = None,
60
        local_cache_dir: str = None,
61
        endpoint_url: str = None,
62
        boto3_transfer_config = None,
63
        content_type_method = None,
64
        extra_args: dict = None
65
    ):
66
        """
67
        Initialize S3 client.
68
        
69
        Args:
70
            aws_access_key_id: AWS access key ID
71
            aws_secret_access_key: AWS secret access key
72
            aws_session_token: AWS session token for temporary credentials
73
            no_sign_request: Make unsigned requests (for public buckets)
74
            botocore_session: Custom botocore session
75
            profile_name: AWS profile name from credentials file
76
            boto3_session: Custom boto3 session
77
            file_cache_mode: Cache management strategy
78
            local_cache_dir: Local directory for file cache
79
            endpoint_url: Custom S3 endpoint URL
80
            boto3_transfer_config: Transfer configuration for multipart uploads
81
            content_type_method: Function to determine MIME types
82
            extra_args: Additional arguments for S3 operations
83
        """
84
```
85

86
## Usage Examples
87

88
### Basic S3 Operations
89

90
```python
91
from cloudpathlib import S3Path, S3Client
92

93
# Create S3 path (uses default client)
94
s3_path = S3Path("s3://my-bucket/data/file.txt")
95

96
# Access S3-specific properties
97
print(f"Bucket: {s3_path.bucket}")    # "my-bucket"
98
print(f"Key: {s3_path.key}")          # "data/file.txt"
99

100
# Check if object exists
101
if s3_path.exists():
102
    print(f"ETag: {s3_path.etag}")
103
```
104

105
### S3 Client Configuration
106

107
```python
108
# Configure S3 client with credentials
109
client = S3Client(
110
    aws_access_key_id="your-access-key",
111
    aws_secret_access_key="your-secret-key"
112
)
113

114
# Set as default client
115
client.set_as_default_client()
116

117
# Use with paths
118
s3_path = S3Path("s3://my-bucket/file.txt")  # Uses configured client
119
```
120

121
### AWS Profile Authentication
122

123
```python
124
# Use AWS profile from ~/.aws/credentials
125
client = S3Client(profile_name="my-profile")
126
client.set_as_default_client()
127

128
# Create paths using profile
129
s3_path = S3Path("s3://my-bucket/data.json")
130
content = s3_path.read_text()
131
```
132

133
### Session Token Authentication
134

135
```python
136
# Use temporary credentials with session token
137
client = S3Client(
138
    aws_access_key_id="temp-access-key",
139
    aws_secret_access_key="temp-secret-key",
140
    aws_session_token="session-token"
141
)
142

143
# Work with temporary credentials
144
s3_path = S3Path("s3://secure-bucket/confidential.txt", client=client)
145
```
146

147
### Public Bucket Access
148

149
```python
150
# Access public S3 buckets without credentials
151
client = S3Client(no_sign_request=True)
152

153
# Work with public data
154
public_path = S3Path("s3://public-bucket/open-data.csv", client=client)
155
data = public_path.read_text()
156
```
157

158
### Custom S3 Endpoints
159

160
```python
161
# Use S3-compatible services (MinIO, Ceph, etc.)
162
client = S3Client(
163
    endpoint_url="https://s3.my-company.com",
164
    aws_access_key_id="minio-access-key",
165
    aws_secret_access_key="minio-secret-key"
166
)
167

168
# Work with custom endpoint
169
s3_path = S3Path("s3://internal-bucket/file.txt", client=client)
170
```
171

172
### Multipart Upload Configuration
173

174
```python
175
import boto3
176

177
# Configure transfer settings for large files
178
transfer_config = boto3.s3.transfer.TransferConfig(
179
    multipart_threshold=1024 * 25,      # 25MB
180
    max_concurrency=10,
181
    multipart_chunksize=1024 * 25,
182
    use_threads=True
183
)
184

185
client = S3Client(boto3_transfer_config=transfer_config)
186

187
# Upload large file with optimized settings
188
large_file = S3Path("s3://my-bucket/large-file.zip", client=client)
189
large_file.upload_from("local-large-file.zip")
190
```
191

192
### S3 Storage Classes
193

194
```python
195
# Upload with specific storage class
196
client = S3Client(extra_args={"StorageClass": "GLACIER"})
197

198
# Upload file to Glacier
199
s3_path = S3Path("s3://archive-bucket/archive.tar", client=client)
200
s3_path.upload_from("data.tar")
201

202
# Upload with different storage classes
203
storage_classes = {
204
    "standard": S3Client(extra_args={"StorageClass": "STANDARD"}),
205
    "ia": S3Client(extra_args={"StorageClass": "STANDARD_IA"}),
206
    "glacier": S3Client(extra_args={"StorageClass": "GLACIER"}),
207
    "deep_archive": S3Client(extra_args={"StorageClass": "DEEP_ARCHIVE"})
208
}
209

210
# Use appropriate storage class
211
file_path = S3Path("s3://my-bucket/backup.zip", client=storage_classes["glacier"])
212
```
213

214
### Server-Side Encryption
215

216
```python
217
# Configure server-side encryption
218
client = S3Client(extra_args={
219
    "ServerSideEncryption": "AES256"
220
})
221

222
# Upload encrypted file
223
encrypted_path = S3Path("s3://secure-bucket/encrypted.txt", client=client)
224
encrypted_path.write_text("Sensitive data")
225

226
# Use KMS encryption
227
kms_client = S3Client(extra_args={
228
    "ServerSideEncryption": "aws:kms",
229
    "SSEKMSKeyId": "your-kms-key-id"
230
})
231
```
232

233
### Metadata and Tags
234

235
```python
236
# Upload with metadata
237
client = S3Client(extra_args={
238
    "Metadata": {
239
        "Author": "Data Team",
240
        "Project": "Analytics",
241
        "Version": "1.0"
242
    },
243
    "Tagging": "Environment=Production&Department=Analytics"
244
})
245

246
s3_path = S3Path("s3://my-bucket/report.pdf", client=client)
247
s3_path.upload_from("monthly-report.pdf")
248
```
249

250
### Presigned URLs
251

252
```python
253
# Generate presigned URLs for S3
254
s3_path = S3Path("s3://private-bucket/document.pdf")
255

256
# Download URL (valid for 1 hour)
257
download_url = s3_path.as_url(presign=True, expire_seconds=3600)
258
print(f"Download: {download_url}")
259

260
# Share with expiration
261
share_url = s3_path.as_url(presign=True, expire_seconds=86400)  # 24 hours
262
print(f"Share URL: {share_url}")
263
```
264

265
### S3 Select Operations
266

267
```python
268
# Note: S3 Select requires direct boto3 usage
269
# This is an example of extending S3Path for advanced operations
270

271
class ExtendedS3Path(S3Path):
272
    def select_object_content(self, expression, input_serialization, output_serialization):
273
        """Perform S3 Select query on object."""
274
        response = self.client.boto3_session.client('s3').select_object_content(
275
            Bucket=self.bucket,
276
            Key=self.key,
277
            Expression=expression,
278
            ExpressionType='SQL',
279
            InputSerialization=input_serialization,
280
            OutputSerialization=output_serialization
281
        )
282
        
283
        # Process streaming response
284
        for event in response['Payload']:
285
            if 'Records' in event:
286
                yield event['Records']['Payload'].decode('utf-8')
287

288
# Usage
289
csv_path = ExtendedS3Path("s3://data-bucket/large-dataset.csv")
290
query = "SELECT * FROM S3Object s WHERE s.category = 'important'"
291

292
for chunk in csv_path.select_object_content(
293
    expression=query,
294
    input_serialization={'CSV': {'FileHeaderInfo': 'Use'}},
295
    output_serialization={'CSV': {}}
296
):
297
    process_chunk(chunk)
298
```
299

300
### Batch Operations
301

302
```python
303
# Upload multiple files efficiently
304
import concurrent.futures
305
from pathlib import Path
306

307
def upload_file(local_path, s3_base):
308
    s3_path = s3_base / local_path.name
309
    s3_path.upload_from(local_path)
310
    return s3_path
311

312
# Parallel uploads
313
local_files = list(Path("data/").glob("*.csv"))
314
s3_base = S3Path("s3://my-bucket/csv-data/")
315

316
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
317
    futures = [executor.submit(upload_file, f, s3_base) for f in local_files]
318
    
319
    for future in concurrent.futures.as_completed(futures):
320
        s3_path = future.result()
321
        print(f"Uploaded: {s3_path}")
322
```
323

324
### Lifecycle Management
325

326
```python
327
# Work with different lifecycle stages
328
def get_storage_class_client(storage_class):
329
    return S3Client(extra_args={"StorageClass": storage_class})
330

331
# Archive old files
332
cutoff_date = datetime.now() - timedelta(days=365)
333
archive_client = get_storage_class_client("GLACIER")
334

335
for s3_file in S3Path("s3://my-bucket/logs/").rglob("*.log"):
336
    if s3_file.stat().st_mtime < cutoff_date.timestamp():
337
        # Copy to Glacier storage
338
        archive_path = S3Path(str(s3_file), client=archive_client)
339
        s3_file.copy(archive_path)
340
        print(f"Archived: {s3_file}")
341
```
342

343
### Cross-Region Operations
344

345
```python
346
# Work with buckets in different regions
347
us_east_client = S3Client(
348
    aws_access_key_id="key",
349
    aws_secret_access_key="secret",
350
    region_name="us-east-1"
351
)
352

353
eu_west_client = S3Client(
354
    aws_access_key_id="key",
355
    aws_secret_access_key="secret", 
356
    region_name="eu-west-1"
357
)
358

359
# Copy between regions
360
source = S3Path("s3://us-bucket/data.txt", client=us_east_client)
361
destination = S3Path("s3://eu-bucket/data.txt", client=eu_west_client)
362

363
source.copy(destination)
364
```
365

366
### Error Handling
367

368
```python
369
from cloudpathlib import (
370
    CloudPathFileNotFoundError,
371
    MissingCredentialsError,
372
    InvalidPrefixError
373
)
374
import botocore.exceptions
375

376
try:
377
    s3_path = S3Path("s3://nonexistent-bucket/file.txt")
378
    content = s3_path.read_text()
379
except CloudPathFileNotFoundError:
380
    print("S3 object not found")
381
except botocore.exceptions.NoCredentialsError:
382
    print("AWS credentials not configured")
383
except botocore.exceptions.BotoCoreError as e:
384
    print(f"AWS error: {e}")
385
```

Version

Tile

Files

s3-integration.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

s3-integration.mddocs/