Tessl Tile for pypi/arctic@1.82.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

arctic-connection.md async-operations.md bson-store.md chunk-store.md date-utilities.md index.md tick-store.md version-store.md

chunk-store.mddocs/

0
# Chunk Store Operations
1

2
Configurable chunked storage for large datasets with custom serialization strategies and date-based chunking. Supports append/update operations, audit trails, and flexible data organization patterns optimized for handling massive datasets that don't fit in memory.
3

4
## Capabilities
5

6
### ChunkStore Class
7

8
Chunked storage system for large datasets with configurable chunking strategies and serialization options.
9

10
```python { .api }
11
class ChunkStore:
12
    """
13
    Chunked storage for large datasets with configurable organization.
14
    
15
    Provides flexible data chunking strategies, custom serialization,
16
    and efficient append/update operations for datasets that exceed
17
    memory capacity or require specific organization patterns.
18
    """
19
```
20

21
### Symbol Management
22

23
Operations for managing chunked data symbols including listing, existence checking, and renaming.
24

25
```python { .api }
26
def list_symbols(self, partial_match=None):
27
    """
28
    List available symbols with optional pattern matching.
29
    
30
    Parameters:
31
    - partial_match: Partial string to match symbol names
32
    
33
    Returns:
34
    List of symbol names in the chunk store
35
    """
36

37
def has_symbol(self, symbol):
38
    """
39
    Check if symbol exists in chunk store.
40
    
41
    Parameters:
42
    - symbol: Symbol name to check
43
    
44
    Returns:
45
    bool: True if symbol exists
46
    """
47

48
def rename(self, from_symbol, to_symbol, audit=None):
49
    """
50
    Rename symbol in chunk store.
51
    
52
    Parameters:
53
    - from_symbol: Current symbol name
54
    - to_symbol: New symbol name
55
    - audit: Optional audit metadata for the operation
56
    
57
    Raises:
58
    - NoDataFoundException: If source symbol doesn't exist
59
    - ArcticException: If target symbol already exists
60
    """
61

62
def delete(self, symbol, chunk_range=None, audit=None):
63
    """
64
    Delete symbol or specific chunk range.
65
    
66
    Parameters:
67
    - symbol: Symbol name to delete
68
    - chunk_range: Specific chunk range to delete (default: all chunks)
69
    - audit: Optional audit metadata for the operation
70
    
71
    Raises:
72
    - NoDataFoundException: If symbol doesn't exist
73
    """
74
```
75

76
### Read Operations
77

78
Methods for retrieving chunked data with range filtering and chunk iteration.
79

80
```python { .api }
81
def read(self, symbol, chunk_range=None, filter_data=True, **kwargs):
82
    """
83
    Read chunked data with optional filtering and range selection.
84
    
85
    Parameters:
86
    - symbol: Symbol name to read
87
    - chunk_range: Specific chunk range to read (default: all chunks)
88
    - filter_data: Apply data filtering during read (default: True)
89
    - **kwargs: Additional read parameters for chunker/serializer
90
    
91
    Returns:
92
    Reconstructed data object (type depends on serializer)
93
    
94
    Raises:
95
    - NoDataFoundException: If symbol or chunk range doesn't exist
96
    """
97

98
def read_metadata(self, symbol):
99
    """
100
    Read symbol metadata without loading chunk data.
101
    
102
    Parameters:
103
    - symbol: Symbol name
104
    
105
    Returns:
106
    dict: Symbol metadata including chunk information
107
    
108
    Raises:
109
    - NoDataFoundException: If symbol doesn't exist
110
    """
111

112
def read_audit_log(self, symbol=None):
113
    """
114
    Read audit log for chunk operations.
115
    
116
    Parameters:
117
    - symbol: Filter by specific symbol (default: all symbols)
118
    
119
    Returns:
120
    List of audit log entries with operation details
121
    """
122
```
123

124
### Write Operations
125

126
Methods for storing chunked data with configurable chunking strategies and metadata support.
127

128
```python { .api }
129
def write(self, symbol, item, metadata=None, chunker=None, audit=None, **kwargs):
130
    """
131
    Write data using specified chunking strategy.
132
    
133
    Parameters:
134
    - symbol: Symbol name to write
135
    - item: Data to store (DataFrame, array, or custom object)
136
    - metadata: Optional metadata dictionary
137
    - chunker: Chunking strategy object (default: DateChunker)
138
    - audit: Optional audit metadata for the operation
139
    - **kwargs: Additional parameters for chunker/serializer
140
    
141
    Raises:
142
    - QuotaExceededException: If write would exceed storage quota
143
    - ArcticException: If chunking or serialization fails
144
    """
145

146
def append(self, symbol, item, upsert=False, metadata=None, audit=None, **kwargs):
147
    """
148
    Append data to existing symbol or create if doesn't exist.
149
    
150
    Parameters:
151
    - symbol: Symbol name
152
    - item: Data to append
153
    - upsert: Create symbol if doesn't exist (default: False)
154
    - metadata: Optional metadata dictionary
155
    - audit: Optional audit metadata for the operation
156
    - **kwargs: Additional parameters for append operation
157
    
158
    Returns:
159
    Operation result information
160
    
161
    Raises:
162
    - NoDataFoundException: If symbol doesn't exist and upsert=False
163
    - OverlappingDataException: If appended data overlaps existing chunks
164
    """
165

166
def update(self, symbol, item, metadata=None, chunk_range=None, 
167
           upsert=False, audit=None, **kwargs):
168
    """
169
    Update data in specific chunk range.
170
    
171
    Parameters:
172
    - symbol: Symbol name
173
    - item: Data to update with
174
    - metadata: Optional metadata dictionary
175
    - chunk_range: Specific chunks to update (default: auto-detect)
176
    - upsert: Create symbol if doesn't exist (default: False)
177
    - audit: Optional audit metadata for the operation
178
    - **kwargs: Additional parameters for update operation
179
    
180
    Returns:
181
    Operation result information
182
    
183
    Raises:
184
    - NoDataFoundException: If symbol doesn't exist and upsert=False
185
    """
186

187
def write_metadata(self, symbol, metadata):
188
    """
189
    Write metadata for symbol without changing data.
190
    
191
    Parameters:
192
    - symbol: Symbol name
193
    - metadata: Metadata dictionary to write
194
    
195
    Raises:
196
    - NoDataFoundException: If symbol doesn't exist
197
    """
198
```
199

200
### Chunk Management
201

202
Methods for managing and iterating over data chunks with range queries.
203

204
```python { .api }
205
def get_chunk_ranges(self, symbol, chunk_range=None, reverse=False):
206
    """
207
    Get chunk ranges for symbol.
208
    
209
    Parameters:
210
    - symbol: Symbol name
211
    - chunk_range: Filter to specific range (default: all chunks)
212
    - reverse: Return ranges in reverse order (default: False)
213
    
214
    Returns:
215
    List of chunk range objects
216
    
217
    Raises:
218
    - NoDataFoundException: If symbol doesn't exist
219
    """
220

221
def iterator(self, symbol, chunk_range=None, **kwargs):
222
    """
223
    Create iterator over symbol chunks.
224
    
225
    Parameters:
226
    - symbol: Symbol name
227
    - chunk_range: Iterate over specific range (default: all chunks)
228
    - **kwargs: Additional iterator parameters
229
    
230
    Returns:
231
    Generator yielding (chunk_range, data) tuples
232
    
233
    Raises:
234
    - NoDataFoundException: If symbol doesn't exist
235
    """
236

237
def reverse_iterator(self, symbol, chunk_range=None, **kwargs):
238
    """
239
    Create reverse iterator over symbol chunks.
240
    
241
    Parameters:
242
    - symbol: Symbol name  
243
    - chunk_range: Iterate over specific range (default: all chunks)
244
    - **kwargs: Additional iterator parameters
245
    
246
    Returns:
247
    Generator yielding (chunk_range, data) tuples in reverse order
248
    
249
    Raises:
250
    - NoDataFoundException: If symbol doesn't exist
251
    """
252
```
253

254
### Information and Statistics
255

256
Methods for retrieving detailed information about symbols and storage statistics.
257

258
```python { .api }
259
def get_info(self, symbol):
260
    """
261
    Get detailed information about symbol's chunks and storage.
262
    
263
    Parameters:
264
    - symbol: Symbol name
265
    
266
    Returns:
267
    dict: Comprehensive information including chunk counts, sizes, ranges
268
    
269
    Raises:
270
    - NoDataFoundException: If symbol doesn't exist
271
    """
272

273
def stats(self):
274
    """
275
    Get chunk store statistics and performance metrics.
276
    
277
    Returns:
278
    dict: Statistics including symbol counts, storage usage, chunk distribution
279
    """
280
```
281

282
## Chunking Strategies
283

284
### DateChunker
285

286
Default chunking strategy that organizes data by date ranges.
287

288
```python { .api }
289
class DateChunker:
290
    """
291
    Date-based chunking strategy for time series data.
292
    
293
    Automatically partitions data based on date boundaries,
294
    enabling efficient date range queries and updates.
295
    """
296
    TYPE = 'date'
297
```
298

299
### PassthroughChunker
300

301
Simple chunking strategy that stores data as single chunks.
302

303
```python { .api }
304
class PassthroughChunker:
305
    """
306
    Pass-through chunking strategy with no automatic partitioning.
307
    
308
    Stores data as provided without automatic chunking,
309
    suitable for data that doesn't benefit from partitioning.
310
    """
311
    TYPE = 'passthrough'
312
```
313

314
## Usage Examples
315

316
### Basic Chunked Storage
317

318
```python
319
from arctic import Arctic, CHUNK_STORE
320
from arctic.chunkstore.date_chunker import DateChunker
321
import pandas as pd
322
import numpy as np
323

324
# Setup chunk store
325
arctic_conn = Arctic('mongodb://localhost:27017')
326
arctic_conn.initialize_library('chunks', CHUNK_STORE)
327
chunk_lib = arctic_conn['chunks']
328

329
# Create large dataset
330
dates = pd.date_range('2020-01-01', periods=1000000, freq='min')
331
large_data = pd.DataFrame({
332
    'value1': np.random.randn(1000000),
333
    'value2': np.random.randn(1000000),
334
    'category': np.random.choice(['A', 'B', 'C'], 1000000)
335
}, index=dates)
336

337
# Write with date-based chunking
338
metadata = {'source': 'simulation', 'data_type': 'time_series'}
339
chunk_lib.write('large_dataset', large_data, 
340
                metadata=metadata,
341
                chunker=DateChunker())
342
```
343

344
### Reading and Chunk Iteration
345

346
```python
347
from arctic.date import DateRange
348
from datetime import datetime
349

350
# Read entire dataset (automatically reconstructed from chunks)
351
full_data = chunk_lib.read('large_dataset')
352
print(f"Full dataset shape: {full_data.shape}")
353

354
# Read specific date range
355
jan_range = DateRange(datetime(2020, 1, 1), datetime(2020, 2, 1))
356
jan_data = chunk_lib.read('large_dataset', chunk_range=jan_range)
357
print(f"January data shape: {jan_data.shape}")
358

359
# Iterate over chunks
360
for chunk_range, chunk_data in chunk_lib.iterator('large_dataset'):
361
    print(f"Chunk {chunk_range}: {chunk_data.shape}")
362
    # Process chunk individually to save memory
363
    
364
# Get chunk information
365
chunks = chunk_lib.get_chunk_ranges('large_dataset')
366
print(f"Total chunks: {len(chunks)}")
367
for chunk in chunks[:5]:  # First 5 chunks
368
    print(f"Chunk range: {chunk}")
369
```
370

371
### Append and Update Operations
372

373
```python
374
# Create additional data to append
375
new_dates = pd.date_range('2020-02-01', periods=100000, freq='min')
376
new_data = pd.DataFrame({
377
    'value1': np.random.randn(100000),
378
    'value2': np.random.randn(100000), 
379
    'category': np.random.choice(['A', 'B', 'C'], 100000)
380
}, index=new_dates)
381

382
# Append new data
383
chunk_lib.append('large_dataset', new_data, 
384
                 audit={'operation': 'monthly_update', 'user': 'system'})
385

386
# Update specific chunk range
387
update_range = DateRange(datetime(2020, 1, 15), datetime(2020, 1, 16))
388
update_data = chunk_lib.read('large_dataset', chunk_range=update_range)
389
update_data['value1'] *= 1.1  # Apply 10% adjustment
390

391
chunk_lib.update('large_dataset', update_data, 
392
                 chunk_range=update_range,
393
                 audit={'operation': 'correction', 'reason': 'data_adjustment'})
394
```
395

396
### Symbol Management and Metadata
397

398
```python
399
# List all symbols
400
symbols = chunk_lib.list_symbols()
401
print(f"Available symbols: {symbols}")
402

403
# Check if symbol exists
404
exists = chunk_lib.has_symbol('large_dataset')
405
print(f"Symbol exists: {exists}")
406

407
# Get detailed symbol information
408
info = chunk_lib.get_info('large_dataset')
409
print(f"Symbol info: {info}")
410

411
# Read metadata
412
metadata = chunk_lib.read_metadata('large_dataset')
413
print(f"Metadata: {metadata}")
414

415
# Update metadata
416
chunk_lib.write_metadata('large_dataset', {
417
    'source': 'simulation',
418
    'data_type': 'time_series',
419
    'last_updated': datetime.now().isoformat(),
420
    'version': '2.0'
421
})
422

423
# Rename symbol
424
chunk_lib.rename('large_dataset', 'historical_data',
425
                 audit={'operation': 'rename', 'reason': 'restructuring'})
426
```
427

428
### Audit Trail and Statistics
429

430
```python
431
# Read audit log
432
audit_entries = chunk_lib.read_audit_log('historical_data')
433
for entry in audit_entries[-10:]:  # Last 10 entries
434
    print(f"{entry['date']}: {entry['operation']}")
435

436
# Get store statistics
437
stats = chunk_lib.stats()
438
print(f"Store statistics: {stats}")
439

440
# Clean up
441
chunk_lib.delete('historical_data', 
442
                 audit={'operation': 'cleanup', 'reason': 'demo_complete'})
443
```
444

445
### Advanced Chunking Strategies
446

447
```python
448
from arctic.chunkstore.passthrough_chunker import PassthroughChunker
449

450
# Use passthrough chunker for non-time-series data
451
static_data = pd.DataFrame({
452
    'id': range(10000),
453
    'name': [f'item_{i}' for i in range(10000)],
454
    'value': np.random.randn(10000)
455
})
456

457
chunk_lib.write('static_reference', static_data,
458
                chunker=PassthroughChunker(),
459
                metadata={'type': 'reference_data'})
460

461
# Custom chunking parameters
462
chunk_lib.write('custom_chunks', large_data,
463
                chunker=DateChunker(),
464
                chunk_size='1D',  # Daily chunks
465
                metadata={'chunking': 'daily'})
466
```

Version

Tile

Files

chunk-store.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

chunk-store.mddocs/