or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

anypath.mdazure-integration.mdclient-management.mdcloud-operations.mdconfiguration.mdcore-operations.mddirectory-operations.mdexceptions.mdfile-io.mdgcs-integration.mdhttp-support.mdindex.mdpatching.mds3-integration.md

s3-integration.mddocs/

0

# AWS S3 Integration

1

2

Complete AWS S3 support with advanced features including multipart uploads, transfer acceleration, custom endpoints, and S3-specific metadata access. This implementation provides full compatibility with AWS S3 and S3-compatible services.

3

4

## Capabilities

5

6

### S3Path Class

7

8

S3-specific path implementation with access to S3 metadata and operations.

9

10

```python { .api }

11

class S3Path(CloudPath):

12

"""AWS S3 path implementation."""

13

14

@property

15

def bucket(self) -> str:

16

"""

17

S3 bucket name.

18

19

Returns:

20

Bucket name from the S3 URI

21

"""

22

23

@property

24

def key(self) -> str:

25

"""

26

S3 object key (path within bucket).

27

28

Returns:

29

Object key string

30

"""

31

32

@property

33

def etag(self) -> str:

34

"""

35

S3 object ETag identifier.

36

37

Returns:

38

ETag string for the object

39

"""

40

```

41

42

### S3Client Class

43

44

S3 client for authentication and service configuration.

45

46

```python { .api }

47

class S3Client:

48

"""AWS S3 client with comprehensive configuration options."""

49

50

def __init__(

51

self,

52

aws_access_key_id: str = None,

53

aws_secret_access_key: str = None,

54

aws_session_token: str = None,

55

no_sign_request: bool = False,

56

botocore_session = None,

57

profile_name: str = None,

58

boto3_session = None,

59

file_cache_mode: FileCacheMode = None,

60

local_cache_dir: str = None,

61

endpoint_url: str = None,

62

boto3_transfer_config = None,

63

content_type_method = None,

64

extra_args: dict = None

65

):

66

"""

67

Initialize S3 client.

68

69

Args:

70

aws_access_key_id: AWS access key ID

71

aws_secret_access_key: AWS secret access key

72

aws_session_token: AWS session token for temporary credentials

73

no_sign_request: Make unsigned requests (for public buckets)

74

botocore_session: Custom botocore session

75

profile_name: AWS profile name from credentials file

76

boto3_session: Custom boto3 session

77

file_cache_mode: Cache management strategy

78

local_cache_dir: Local directory for file cache

79

endpoint_url: Custom S3 endpoint URL

80

boto3_transfer_config: Transfer configuration for multipart uploads

81

content_type_method: Function to determine MIME types

82

extra_args: Additional arguments for S3 operations

83

"""

84

```

85

86

## Usage Examples

87

88

### Basic S3 Operations

89

90

```python

91

from cloudpathlib import S3Path, S3Client

92

93

# Create S3 path (uses default client)

94

s3_path = S3Path("s3://my-bucket/data/file.txt")

95

96

# Access S3-specific properties

97

print(f"Bucket: {s3_path.bucket}") # "my-bucket"

98

print(f"Key: {s3_path.key}") # "data/file.txt"

99

100

# Check if object exists

101

if s3_path.exists():

102

print(f"ETag: {s3_path.etag}")

103

```

104

105

### S3 Client Configuration

106

107

```python

108

# Configure S3 client with credentials

109

client = S3Client(

110

aws_access_key_id="your-access-key",

111

aws_secret_access_key="your-secret-key"

112

)

113

114

# Set as default client

115

client.set_as_default_client()

116

117

# Use with paths

118

s3_path = S3Path("s3://my-bucket/file.txt") # Uses configured client

119

```

120

121

### AWS Profile Authentication

122

123

```python

124

# Use AWS profile from ~/.aws/credentials

125

client = S3Client(profile_name="my-profile")

126

client.set_as_default_client()

127

128

# Create paths using profile

129

s3_path = S3Path("s3://my-bucket/data.json")

130

content = s3_path.read_text()

131

```

132

133

### Session Token Authentication

134

135

```python

136

# Use temporary credentials with session token

137

client = S3Client(

138

aws_access_key_id="temp-access-key",

139

aws_secret_access_key="temp-secret-key",

140

aws_session_token="session-token"

141

)

142

143

# Work with temporary credentials

144

s3_path = S3Path("s3://secure-bucket/confidential.txt", client=client)

145

```

146

147

### Public Bucket Access

148

149

```python

150

# Access public S3 buckets without credentials

151

client = S3Client(no_sign_request=True)

152

153

# Work with public data

154

public_path = S3Path("s3://public-bucket/open-data.csv", client=client)

155

data = public_path.read_text()

156

```

157

158

### Custom S3 Endpoints

159

160

```python

161

# Use S3-compatible services (MinIO, Ceph, etc.)

162

client = S3Client(

163

endpoint_url="https://s3.my-company.com",

164

aws_access_key_id="minio-access-key",

165

aws_secret_access_key="minio-secret-key"

166

)

167

168

# Work with custom endpoint

169

s3_path = S3Path("s3://internal-bucket/file.txt", client=client)

170

```

171

172

### Multipart Upload Configuration

173

174

```python

175

import boto3

176

177

# Configure transfer settings for large files

178

transfer_config = boto3.s3.transfer.TransferConfig(

179

multipart_threshold=1024 * 25, # 25MB

180

max_concurrency=10,

181

multipart_chunksize=1024 * 25,

182

use_threads=True

183

)

184

185

client = S3Client(boto3_transfer_config=transfer_config)

186

187

# Upload large file with optimized settings

188

large_file = S3Path("s3://my-bucket/large-file.zip", client=client)

189

large_file.upload_from("local-large-file.zip")

190

```

191

192

### S3 Storage Classes

193

194

```python

195

# Upload with specific storage class

196

client = S3Client(extra_args={"StorageClass": "GLACIER"})

197

198

# Upload file to Glacier

199

s3_path = S3Path("s3://archive-bucket/archive.tar", client=client)

200

s3_path.upload_from("data.tar")

201

202

# Upload with different storage classes

203

storage_classes = {

204

"standard": S3Client(extra_args={"StorageClass": "STANDARD"}),

205

"ia": S3Client(extra_args={"StorageClass": "STANDARD_IA"}),

206

"glacier": S3Client(extra_args={"StorageClass": "GLACIER"}),

207

"deep_archive": S3Client(extra_args={"StorageClass": "DEEP_ARCHIVE"})

208

}

209

210

# Use appropriate storage class

211

file_path = S3Path("s3://my-bucket/backup.zip", client=storage_classes["glacier"])

212

```

213

214

### Server-Side Encryption

215

216

```python

217

# Configure server-side encryption

218

client = S3Client(extra_args={

219

"ServerSideEncryption": "AES256"

220

})

221

222

# Upload encrypted file

223

encrypted_path = S3Path("s3://secure-bucket/encrypted.txt", client=client)

224

encrypted_path.write_text("Sensitive data")

225

226

# Use KMS encryption

227

kms_client = S3Client(extra_args={

228

"ServerSideEncryption": "aws:kms",

229

"SSEKMSKeyId": "your-kms-key-id"

230

})

231

```

232

233

### Metadata and Tags

234

235

```python

236

# Upload with metadata

237

client = S3Client(extra_args={

238

"Metadata": {

239

"Author": "Data Team",

240

"Project": "Analytics",

241

"Version": "1.0"

242

},

243

"Tagging": "Environment=Production&Department=Analytics"

244

})

245

246

s3_path = S3Path("s3://my-bucket/report.pdf", client=client)

247

s3_path.upload_from("monthly-report.pdf")

248

```

249

250

### Presigned URLs

251

252

```python

253

# Generate presigned URLs for S3

254

s3_path = S3Path("s3://private-bucket/document.pdf")

255

256

# Download URL (valid for 1 hour)

257

download_url = s3_path.as_url(presign=True, expire_seconds=3600)

258

print(f"Download: {download_url}")

259

260

# Share with expiration

261

share_url = s3_path.as_url(presign=True, expire_seconds=86400) # 24 hours

262

print(f"Share URL: {share_url}")

263

```

264

265

### S3 Select Operations

266

267

```python

268

# Note: S3 Select requires direct boto3 usage

269

# This is an example of extending S3Path for advanced operations

270

271

class ExtendedS3Path(S3Path):

272

def select_object_content(self, expression, input_serialization, output_serialization):

273

"""Perform S3 Select query on object."""

274

response = self.client.boto3_session.client('s3').select_object_content(

275

Bucket=self.bucket,

276

Key=self.key,

277

Expression=expression,

278

ExpressionType='SQL',

279

InputSerialization=input_serialization,

280

OutputSerialization=output_serialization

281

)

282

283

# Process streaming response

284

for event in response['Payload']:

285

if 'Records' in event:

286

yield event['Records']['Payload'].decode('utf-8')

287

288

# Usage

289

csv_path = ExtendedS3Path("s3://data-bucket/large-dataset.csv")

290

query = "SELECT * FROM S3Object s WHERE s.category = 'important'"

291

292

for chunk in csv_path.select_object_content(

293

expression=query,

294

input_serialization={'CSV': {'FileHeaderInfo': 'Use'}},

295

output_serialization={'CSV': {}}

296

):

297

process_chunk(chunk)

298

```

299

300

### Batch Operations

301

302

```python

303

# Upload multiple files efficiently

304

import concurrent.futures

305

from pathlib import Path

306

307

def upload_file(local_path, s3_base):

308

s3_path = s3_base / local_path.name

309

s3_path.upload_from(local_path)

310

return s3_path

311

312

# Parallel uploads

313

local_files = list(Path("data/").glob("*.csv"))

314

s3_base = S3Path("s3://my-bucket/csv-data/")

315

316

with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:

317

futures = [executor.submit(upload_file, f, s3_base) for f in local_files]

318

319

for future in concurrent.futures.as_completed(futures):

320

s3_path = future.result()

321

print(f"Uploaded: {s3_path}")

322

```

323

324

### Lifecycle Management

325

326

```python

327

# Work with different lifecycle stages

328

def get_storage_class_client(storage_class):

329

return S3Client(extra_args={"StorageClass": storage_class})

330

331

# Archive old files

332

cutoff_date = datetime.now() - timedelta(days=365)

333

archive_client = get_storage_class_client("GLACIER")

334

335

for s3_file in S3Path("s3://my-bucket/logs/").rglob("*.log"):

336

if s3_file.stat().st_mtime < cutoff_date.timestamp():

337

# Copy to Glacier storage

338

archive_path = S3Path(str(s3_file), client=archive_client)

339

s3_file.copy(archive_path)

340

print(f"Archived: {s3_file}")

341

```

342

343

### Cross-Region Operations

344

345

```python

346

# Work with buckets in different regions

347

us_east_client = S3Client(

348

aws_access_key_id="key",

349

aws_secret_access_key="secret",

350

region_name="us-east-1"

351

)

352

353

eu_west_client = S3Client(

354

aws_access_key_id="key",

355

aws_secret_access_key="secret",

356

region_name="eu-west-1"

357

)

358

359

# Copy between regions

360

source = S3Path("s3://us-bucket/data.txt", client=us_east_client)

361

destination = S3Path("s3://eu-bucket/data.txt", client=eu_west_client)

362

363

source.copy(destination)

364

```

365

366

### Error Handling

367

368

```python

369

from cloudpathlib import (

370

CloudPathFileNotFoundError,

371

MissingCredentialsError,

372

InvalidPrefixError

373

)

374

import botocore.exceptions

375

376

try:

377

s3_path = S3Path("s3://nonexistent-bucket/file.txt")

378

content = s3_path.read_text()

379

except CloudPathFileNotFoundError:

380

print("S3 object not found")

381

except botocore.exceptions.NoCredentialsError:

382

print("AWS credentials not configured")

383

except botocore.exceptions.BotoCoreError as e:

384

print(f"AWS error: {e}")

385

```