or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

configuration.mdcore-data-structures.mddata-io.mdindex.mdquery-indexing.mdsingle-cell-biology.mdspatial-data.md

configuration.mddocs/

0

# Configuration and Options

1

2

Configuration classes for TileDB context management and platform-specific options for creating and writing SOMA objects. These classes provide fine-grained control over TileDB storage engine behavior and performance characteristics.

3

4

## Package Import

5

6

```python

7

from tiledbsoma.options import (

8

SOMATileDBContext, TileDBCreateOptions, TileDBWriteOptions, ConfigDict

9

)

10

```

11

12

## Capabilities

13

14

### SOMATileDBContext

15

16

TileDB context configuration for SOMA operations. The context manages TileDB-specific settings including storage backends, memory limits, threading, and other platform configurations.

17

18

```python { .api }

19

class SOMATileDBContext:

20

def __init__(self, config=None):

21

"""

22

Initialize TileDB context for SOMA operations.

23

24

Parameters:

25

- config: dict or ConfigDict, TileDB configuration parameters

26

"""

27

28

@property

29

def config(self):

30

"""

31

Get current TileDB configuration.

32

33

Returns:

34

ConfigDict: Current configuration settings

35

"""

36

37

def __enter__(self):

38

"""Context manager entry."""

39

return self

40

41

def __exit__(self, exc_type, exc_val, exc_tb):

42

"""Context manager exit."""

43

pass

44

```

45

46

#### Usage Example

47

48

```python

49

import tiledbsoma

50

from tiledbsoma.options import SOMATileDBContext

51

52

# Basic context usage

53

with SOMATileDBContext() as ctx:

54

with tiledbsoma.open("experiment.soma", context=ctx) as exp:

55

# Operations use the specified context

56

data = exp.obs.read().concat()

57

58

# Context with custom configuration

59

config = {

60

"sm.memory_budget": 2**30, # 1GB memory budget

61

"sm.tile_cache_size": 2**28, # 256MB tile cache

62

"vfs.s3.max_parallel_ops": 16, # S3 parallel operations

63

"vfs.s3.multipart_part_size": 50*1024*1024 # 50MB parts

64

}

65

66

with SOMATileDBContext(config=config) as ctx:

67

# Create objects with custom settings

68

with tiledbsoma.DataFrame.create(

69

"high_perf_dataframe.soma",

70

schema=schema,

71

context=ctx

72

) as df:

73

df.write(data)

74

```

75

76

### ConfigDict

77

78

Configuration dictionary for TileDB context with structured parameter management.

79

80

```python { .api }

81

class ConfigDict:

82

def __init__(self, config_dict=None):

83

"""

84

Initialize configuration dictionary.

85

86

Parameters:

87

- config_dict: dict, initial configuration parameters

88

"""

89

90

def __getitem__(self, key):

91

"""Get configuration parameter."""

92

93

def __setitem__(self, key, value):

94

"""Set configuration parameter."""

95

96

def update(self, other):

97

"""Update configuration with another dict or ConfigDict."""

98

99

def items(self):

100

"""Iterate over configuration items."""

101

```

102

103

#### Usage Example

104

105

```python

106

from tiledbsoma.options import ConfigDict, SOMATileDBContext

107

108

# Create configuration

109

config = ConfigDict({

110

"sm.memory_budget": 1024**3, # 1GB

111

"sm.tile_cache_size": 512*1024**2, # 512MB

112

})

113

114

# Update configuration

115

config.update({

116

"vfs.num_threads": 8,

117

"sm.compute_concurrency_level": 4

118

})

119

120

# Use with context

121

with SOMATileDBContext(config=config) as ctx:

122

# Context uses the configuration

123

pass

124

```

125

126

### TileDBCreateOptions

127

128

Platform-specific options for creating TileDB objects. These options control storage layout, compression, encryption, and other creation-time parameters.

129

130

```python { .api }

131

class TileDBCreateOptions:

132

def __init__(self, **kwargs):

133

"""

134

Initialize TileDB creation options.

135

136

Parameters:

137

- **kwargs: TileDB-specific creation parameters

138

139

Common Parameters:

140

- tile_order: str, tile order ("row-major" or "col-major")

141

- cell_order: str, cell order ("row-major" or "col-major")

142

- capacity: int, tile capacity

143

- sparse: bool, whether array is sparse

144

- allows_duplicates: bool, whether duplicates are allowed

145

- offsets_compression: str, compression for offsets

146

- offsets_compression_level: int, compression level for offsets

147

- validity_compression: str, compression for validity data

148

- validity_compression_level: int, compression level for validity

149

"""

150

151

def __getitem__(self, key):

152

"""Get creation option."""

153

154

def __setitem__(self, key, value):

155

"""Set creation option."""

156

157

def get(self, key, default=None):

158

"""Get creation option with default."""

159

```

160

161

#### Usage Example

162

163

```python

164

import tiledbsoma

165

from tiledbsoma.options import TileDBCreateOptions

166

import pyarrow as pa

167

168

# Create options for high-performance sparse array

169

create_opts = TileDBCreateOptions(

170

capacity=100000, # Large tile capacity

171

tile_order="row-major",

172

cell_order="row-major",

173

allows_duplicates=False,

174

# Compression settings

175

offsets_compression="lz4",

176

offsets_compression_level=1,

177

validity_compression="rle" # Run-length encoding

178

)

179

180

# Use options when creating arrays

181

with tiledbsoma.SparseNDArray.create(

182

"optimized_matrix.soma",

183

type=pa.float32(),

184

shape=(10000, 5000),

185

platform_config=create_opts

186

) as array:

187

# Array created with optimized settings

188

pass

189

190

# Create options for dataframes

191

df_create_opts = TileDBCreateOptions(

192

capacity=10000,

193

tile_order="row-major",

194

offsets_compression="zstd",

195

offsets_compression_level=3

196

)

197

198

with tiledbsoma.DataFrame.create(

199

"optimized_dataframe.soma",

200

schema=schema,

201

platform_config=df_create_opts

202

) as df:

203

df.write(data)

204

```

205

206

### TileDBWriteOptions

207

208

Platform-specific options for writing to TileDB objects. These options control write behavior, memory usage, and performance characteristics during data ingestion.

209

210

```python { .api }

211

class TileDBWriteOptions:

212

def __init__(self, **kwargs):

213

"""

214

Initialize TileDB write options.

215

216

Parameters:

217

- **kwargs: TileDB-specific write parameters

218

219

Common Parameters:

220

- batch_size: int, number of elements per write batch

221

- memory_budget: int, memory budget for writes in bytes

222

- check_coord_dups: bool, check for coordinate duplicates

223

- check_coord_oob: bool, check for out-of-bounds coordinates

224

- dedup_coords: bool, deduplicate coordinates

225

"""

226

227

def __getitem__(self, key):

228

"""Get write option."""

229

230

def __setitem__(self, key, value):

231

"""Set write option."""

232

233

def get(self, key, default=None):

234

"""Get write option with default."""

235

```

236

237

#### Usage Example

238

239

```python

240

import tiledbsoma

241

from tiledbsoma.options import TileDBWriteOptions

242

import numpy as np

243

import pyarrow as pa

244

245

# Write options for large bulk loads

246

write_opts = TileDBWriteOptions(

247

batch_size=50000, # Large batches

248

memory_budget=2**30, # 1GB memory budget

249

check_coord_dups=False, # Skip duplicate check for performance

250

check_coord_oob=False, # Skip bounds check for performance

251

dedup_coords=False # Skip deduplication

252

)

253

254

# Use write options for bulk data loading

255

with tiledbsoma.open("large_matrix.soma", mode="w") as array:

256

# Generate large dataset

257

n_nonzero = 1000000

258

cell_ids = np.random.randint(0, 50000, n_nonzero)

259

gene_ids = np.random.randint(0, 20000, n_nonzero)

260

values = np.random.exponential(2.0, n_nonzero)

261

262

coords = pa.table({

263

"soma_dim_0": cell_ids,

264

"soma_dim_1": gene_ids

265

})

266

data = pa.table({

267

"soma_data": values

268

})

269

270

# Write with optimized settings

271

array.write((coords, data), platform_config=write_opts)

272

273

# Write options for safety-first approach

274

safe_write_opts = TileDBWriteOptions(

275

check_coord_dups=True,

276

check_coord_oob=True,

277

dedup_coords=True,

278

memory_budget=512*1024**2 # 512MB

279

)

280

281

with tiledbsoma.open("safe_dataframe.soma", mode="w") as df:

282

df.write(data, platform_config=safe_write_opts)

283

```

284

285

### Common Configuration Patterns

286

287

#### Cloud Storage Configuration

288

289

```python

290

from tiledbsoma.options import SOMATileDBContext, ConfigDict

291

292

# S3 configuration

293

s3_config = ConfigDict({

294

# S3 settings

295

"vfs.s3.aws_access_key_id": "your_access_key",

296

"vfs.s3.aws_secret_access_key": "your_secret_key",

297

"vfs.s3.region": "us-west-2",

298

"vfs.s3.max_parallel_ops": 16,

299

"vfs.s3.multipart_part_size": 100*1024*1024, # 100MB

300

"vfs.s3.use_virtual_addressing": "true",

301

302

# Performance settings

303

"sm.memory_budget": 4*1024**3, # 4GB

304

"sm.tile_cache_size": 1024**3, # 1GB

305

"vfs.num_threads": 16

306

})

307

308

with SOMATileDBContext(config=s3_config) as ctx:

309

# Work with S3-stored data

310

with tiledbsoma.open("s3://my-bucket/experiment.soma", context=ctx) as exp:

311

data = exp.obs.read().concat()

312

313

# Azure Blob Storage configuration

314

azure_config = ConfigDict({

315

"vfs.azure.storage_account_name": "myaccount",

316

"vfs.azure.storage_account_key": "mykey",

317

"vfs.azure.max_parallel_ops": 16,

318

"sm.memory_budget": 2*1024**3,

319

"vfs.num_threads": 8

320

})

321

```

322

323

#### High-Performance Local Storage

324

325

```python

326

from tiledbsoma.options import SOMATileDBContext, TileDBCreateOptions, TileDBWriteOptions

327

328

# High-performance local configuration

329

local_config = ConfigDict({

330

"sm.memory_budget": 8*1024**3, # 8GB memory

331

"sm.tile_cache_size": 2*1024**3, # 2GB cache

332

"sm.compute_concurrency_level": 8,

333

"vfs.num_threads": 16,

334

"sm.io_concurrency_level": 4

335

})

336

337

# Optimized creation options

338

create_opts = TileDBCreateOptions(

339

capacity=100000,

340

tile_order="row-major",

341

offsets_compression="lz4",

342

offsets_compression_level=1

343

)

344

345

# Optimized write options

346

write_opts = TileDBWriteOptions(

347

batch_size=100000,

348

memory_budget=4*1024**3,

349

check_coord_dups=False

350

)

351

352

# Combined usage

353

with SOMATileDBContext(config=local_config) as ctx:

354

with tiledbsoma.SparseNDArray.create(

355

"fast_array.soma",

356

type=pa.float32(),

357

shape=(100000, 50000),

358

context=ctx,

359

platform_config=create_opts

360

) as array:

361

# Fast bulk loading

362

array.write(data, platform_config=write_opts)

363

```

364

365

#### Memory-Constrained Configuration

366

367

```python

368

# Configuration for memory-limited environments

369

low_memory_config = ConfigDict({

370

"sm.memory_budget": 256*1024**2, # 256MB

371

"sm.tile_cache_size": 64*1024**2, # 64MB

372

"vfs.num_threads": 2,

373

"sm.compute_concurrency_level": 1

374

})

375

376

conservative_write_opts = TileDBWriteOptions(

377

batch_size=1000,

378

memory_budget=128*1024**2, # 128MB

379

check_coord_dups=True,

380

check_coord_oob=True

381

)

382

383

with SOMATileDBContext(config=low_memory_config) as ctx:

384

# Memory-efficient operations

385

with tiledbsoma.open("data.soma", context=ctx) as obj:

386

# Process in small batches

387

for batch in obj.read(batch_size=1000):

388

# Process batch

389

pass

390

```

391

392

### Context Management Best Practices

393

394

```python

395

import tiledbsoma

396

from tiledbsoma.options import SOMATileDBContext

397

398

# Context sharing across operations

399

config = {"sm.memory_budget": 2*1024**3}

400

401

with SOMATileDBContext(config=config) as ctx:

402

# Create experiment with shared context

403

with tiledbsoma.Experiment.create("exp.soma", context=ctx) as exp:

404

# All operations share the same context

405

exp.add_new_dataframe("obs", schema=obs_schema)

406

407

with exp.obs as obs_df:

408

obs_df.write(obs_data)

409

410

# Read operations with same context

411

with tiledbsoma.open("exp.soma", context=ctx) as exp:

412

data = exp.obs.read().concat()

413

```

414

415

This configuration system provides comprehensive control over TileDB-SOMA performance and behavior, enabling optimization for different use cases from high-throughput cloud deployments to memory-constrained local analysis.