or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-source.mdfile-formats.mdindex.mdstream-operations.mdutilities.mdzip-support.md

configuration.mddocs/

0

# Configuration Management

1

2

Configuration classes and specifications for both V4 and legacy V3 formats. Handles S3 authentication, bucket configuration, file format specifications with full validation and schema generation.

3

4

## Capabilities

5

6

### V4 Configuration Class

7

8

Primary configuration class for the V4 S3 connector with comprehensive validation and schema generation.

9

10

```python { .api }

11

class Config(AbstractFileBasedSpec):

12

"""

13

Configuration specification for S3 connector V4.

14

Inherits from AbstractFileBasedSpec for file-based connector compatibility.

15

"""

16

17

bucket: str

18

"""S3 bucket name to sync data from"""

19

20

aws_access_key_id: Optional[str]

21

"""AWS access key ID for authentication (marked as secret)"""

22

23

aws_secret_access_key: Optional[str]

24

"""AWS secret access key for authentication (marked as secret)"""

25

26

role_arn: Optional[str]

27

"""AWS IAM role ARN for assume role authentication"""

28

29

endpoint: Optional[str]

30

"""S3-compatible endpoint URL for non-AWS services"""

31

32

region_name: Optional[str]

33

"""AWS region name where the bucket is located"""

34

35

delivery_method: Union[DeliverRecords, DeliverRawFiles]

36

"""Delivery method configuration for how data should be processed"""

37

38

@classmethod

39

def documentation_url(cls) -> AnyUrl:

40

"""

41

Returns the documentation URL for the connector.

42

43

Returns:

44

URL pointing to the connector documentation

45

"""

46

47

@root_validator

48

def validate_optional_args(cls, values):

49

"""

50

Validates configuration fields and their relationships.

51

Uses Pydantic root_validator decorator for comprehensive validation.

52

53

Args:

54

values: Configuration values to validate

55

56

Returns:

57

Validated configuration values

58

"""

59

60

@classmethod

61

def schema(cls, *args, **kwargs) -> Dict[str, Any]:

62

"""

63

Generates the configuration schema for the connector.

64

65

Returns:

66

Dictionary representing the JSON schema for configuration

67

"""

68

```

69

70

### Legacy Configuration Transformer

71

72

Handles transformation from legacy V3 configurations to V4 format for backward compatibility.

73

74

```python { .api }

75

class LegacyConfigTransformer:

76

"""

77

Transforms legacy V3 configurations to V4 format.

78

Ensures backward compatibility for existing connector deployments.

79

"""

80

81

@classmethod

82

def convert(cls, legacy_config: SourceS3Spec) -> Mapping[str, Any]:

83

"""

84

Converts legacy V3 configuration to V4 format.

85

86

Args:

87

legacy_config: V3 configuration specification

88

89

Returns:

90

V4 format configuration dictionary

91

"""

92

93

@classmethod

94

def _create_globs(cls, path_pattern: str) -> List[str]:

95

"""

96

Creates glob patterns from V3 path patterns.

97

98

Args:

99

path_pattern: V3 path pattern string

100

101

Returns:

102

List of glob patterns for V4 format

103

"""

104

105

@classmethod

106

def _transform_seconds_to_micros(cls, datetime_str: str) -> str:

107

"""

108

Transforms datetime formats from seconds to microseconds precision.

109

110

Args:

111

datetime_str: Datetime string in V3 format

112

113

Returns:

114

Datetime string in V4 format

115

"""

116

117

@classmethod

118

def _transform_file_format(cls, format_options) -> Mapping[str, Any]:

119

"""

120

Transforms file format configurations from V3 to V4.

121

122

Args:

123

format_options: V3 format options

124

125

Returns:

126

V4 format configuration

127

"""

128

129

@classmethod

130

def parse_config_options_str(cls, options_field: str, options_value: Optional[str]) -> Dict[str, Any]:

131

"""

132

Parses JSON configuration strings from V3 format.

133

134

Args:

135

options_field: Name of the options field

136

options_value: JSON string value or None

137

138

Returns:

139

Parsed configuration dictionary

140

"""

141

142

@staticmethod

143

def _filter_legacy_noops(advanced_options: Dict[str, Any]):

144

"""

145

Filters out legacy no-operation options that are no longer needed.

146

147

Args:

148

advanced_options: Dictionary of advanced configuration options

149

"""

150

```

151

152

### Legacy V3 Specification (SourceS3Spec)

153

154

Legacy V3 configuration specification for backward compatibility, still actively supported by the connector.

155

156

```python { .api }

157

class SourceS3Spec(SourceFilesAbstractSpec, BaseModel):

158

"""

159

Legacy V3 configuration specification for backward compatibility.

160

Contains nested S3Provider class for provider-specific settings.

161

"""

162

163

class S3Provider(BaseModel):

164

"""Provider-specific configuration fields for S3 access"""

165

bucket: str

166

"""S3 bucket name"""

167

168

aws_access_key_id: Optional[str]

169

"""AWS access key ID for authentication (marked as secret)"""

170

171

aws_secret_access_key: Optional[str]

172

"""AWS secret access key for authentication (marked as secret)"""

173

174

role_arn: Optional[str]

175

"""AWS IAM role ARN for assume role authentication"""

176

177

path_prefix: str

178

"""S3 key prefix to filter files"""

179

180

endpoint: str

181

"""S3-compatible endpoint URL for non-AWS services"""

182

183

region_name: Optional[str]

184

"""AWS region name where the bucket is located"""

185

186

start_date: Optional[str]

187

"""Start date for incremental sync (ISO format)"""

188

189

provider: S3Provider

190

"""S3 provider configuration"""

191

192

class SourceFilesAbstractSpec(BaseModel):

193

"""

194

Abstract specification for file-based sources.

195

Provides common configuration fields and schema processing methods.

196

"""

197

198

dataset: str

199

"""Output stream name (pattern: ^([A-Za-z0-9-_]+)$)"""

200

201

path_pattern: str

202

"""File pattern regex for replication"""

203

204

format: Union[CsvFormat, ParquetFormat, AvroFormat, JsonlFormat]

205

"""File format specification"""

206

207

user_schema: str

208

"""Manual schema enforcement (alias: "schema")"""

209

210

@staticmethod

211

def change_format_to_oneOf(schema: dict) -> dict:

212

"""

213

Transforms schema format specifications to oneOf structure.

214

215

Args:

216

schema: JSON schema dictionary

217

218

Returns:

219

Transformed schema with oneOf format specifications

220

"""

221

222

@staticmethod

223

def remove_enum_allOf(schema: dict) -> dict:

224

"""

225

Removes unsupported allOf structures from enum definitions.

226

227

Args:

228

schema: JSON schema dictionary

229

230

Returns:

231

Schema with allOf structures removed

232

"""

233

234

@staticmethod

235

def check_provider_added(schema: dict) -> None:

236

"""

237

Validates that provider property is properly added to schema.

238

239

Args:

240

schema: JSON schema dictionary

241

242

Raises:

243

ValidationError: If provider property is missing or invalid

244

"""

245

246

@staticmethod

247

def resolve_refs(schema: dict) -> dict:

248

"""

249

Resolves JSON schema references within the schema.

250

251

Args:

252

schema: JSON schema dictionary with references

253

254

Returns:

255

Schema with resolved references

256

"""

257

258

@classmethod

259

def schema(cls, *args, **kwargs) -> Dict[str, Any]:

260

"""

261

Generates schema with post-processing transformations.

262

263

Returns:

264

Processed JSON schema dictionary

265

"""

266

```

267

268

## Usage Examples

269

270

### Basic V4 Configuration

271

272

```python

273

from source_s3.v4 import Config

274

275

# Create configuration with AWS credentials

276

config = Config(

277

bucket="my-data-bucket",

278

aws_access_key_id="AKIAIOSFODNN7EXAMPLE",

279

aws_secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",

280

region_name="us-east-1",

281

delivery_method=DeliverRecords()

282

)

283

```

284

285

### IAM Role Configuration

286

287

```python

288

from source_s3.v4 import Config

289

290

# Configure with IAM role assumption

291

config = Config(

292

bucket="secure-bucket",

293

role_arn="arn:aws:iam::123456789012:role/S3AccessRole",

294

region_name="us-west-2",

295

delivery_method=DeliverRawFiles()

296

)

297

```

298

299

### S3-Compatible Service Configuration

300

301

```python

302

from source_s3.v4 import Config

303

304

# Configure for MinIO or other S3-compatible service

305

config = Config(

306

bucket="minio-bucket",

307

endpoint="https://minio.example.com",

308

aws_access_key_id="minioadmin",

309

aws_secret_access_key="minioadmin",

310

delivery_method=DeliverRecords()

311

)

312

```

313

314

### Legacy Configuration Transformation

315

316

```python

317

from source_s3.v4 import LegacyConfigTransformer

318

from source_s3.source import SourceS3Spec

319

320

# Transform V3 config to V4

321

legacy_spec = SourceS3Spec(...) # V3 configuration

322

v4_config = LegacyConfigTransformer.convert(legacy_spec)

323

```

324

325

### Schema Generation

326

327

```python

328

from source_s3.v4 import Config

329

330

# Generate configuration schema

331

schema = Config.schema()

332

print(schema) # JSON schema for the configuration

333

```