CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-arctic

AHL Research Versioned TimeSeries and Tick store for high-performance financial data storage and analysis

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

version-store.mddocs/

Version Store Operations

Versioned storage for pandas DataFrames and Series with complete audit trails, point-in-time snapshots, and efficient data retrieval. Supports temporal data access, metadata management, and multi-version data handling optimized for financial time series data.

Capabilities

Symbol Management

Operations for managing symbols (data identifiers) within the version store, including listing and existence checking.

def list_symbols(self, all_symbols=False, snapshot=None, regex=None, **kwargs):
    """
    List symbols in version store.
    
    Parameters:
    - all_symbols: Include symbols from all snapshots (default: False)
    - snapshot: List symbols from specific snapshot
    - regex: Filter symbols by regular expression pattern
    - **kwargs: Additional filtering parameters
    
    Returns:
    List of symbol names
    """

def has_symbol(self, symbol, as_of=None):
    """
    Check if symbol exists at given time.
    
    Parameters:
    - symbol: Symbol name to check
    - as_of: Check existence at specific datetime (default: latest)
    
    Returns:
    bool: True if symbol exists
    """

Read Operations

Methods for retrieving versioned data and metadata with temporal filtering and version-specific access.

def read(self, symbol, as_of=None, date_range=None, from_version=None, 
         allow_secondary=None, **kwargs):
    """
    Read symbol data with version and temporal filtering.
    
    Parameters:
    - symbol: Symbol name to read
    - as_of: Read data as of specific datetime
    - date_range: DateRange object for temporal filtering
    - from_version: Read from specific version number
    - allow_secondary: Allow reads from MongoDB secondary nodes
    - **kwargs: Additional read parameters
    
    Returns:
    VersionedItem: Object containing data, metadata, and version info
    
    Raises:
    - NoDataFoundException: If symbol or version doesn't exist
    """

def read_metadata(self, symbol, as_of=None, allow_secondary=None):
    """
    Read symbol metadata without loading data.
    
    Parameters:
    - symbol: Symbol name
    - as_of: Read metadata as of specific datetime
    - allow_secondary: Allow reads from secondary nodes
    
    Returns:
    dict: Symbol metadata
    
    Raises:
    - NoDataFoundException: If symbol doesn't exist
    """

Write Operations

Methods for storing and updating versioned data with metadata support and version management.

def write(self, symbol, data, metadata=None, prune_previous_version=True, **kwargs):
    """
    Write/overwrite symbol data creating new version.
    
    Parameters:
    - symbol: Symbol name to write
    - data: Data to store (pandas DataFrame/Series or numpy array)
    - metadata: Optional metadata dictionary
    - prune_previous_version: Remove previous version to save space
    - **kwargs: Additional write parameters
    
    Returns:
    VersionedItem: Written data with version information
    
    Raises:
    - QuotaExceededException: If write would exceed storage quota
    - UnhandledDtypeException: If data type not supported
    """

def append(self, symbol, data, metadata=None, prune_previous_version=True, 
           upsert=True, **kwargs):
    """
    Append data to existing symbol or create if doesn't exist.
    
    Parameters:
    - symbol: Symbol name
    - data: Data to append
    - metadata: Optional metadata dictionary
    - prune_previous_version: Remove previous version after append
    - upsert: Create symbol if doesn't exist
    - **kwargs: Additional append parameters
    
    Returns:
    VersionedItem: Updated data with version information
    
    Raises:
    - OverlappingDataException: If appended data overlaps existing data
    - UnorderedDataException: If data not properly time-ordered
    """

def write_metadata(self, symbol, metadata, prune_previous_version=True, **kwargs):
    """
    Write metadata only without changing data.
    
    Parameters:
    - symbol: Symbol name
    - metadata: Metadata dictionary to write
    - prune_previous_version: Remove previous version
    - **kwargs: Additional parameters
    
    Returns:
    VersionedItem: Symbol data with updated metadata
    """

Version Management

Operations for managing multiple versions of data including listing, restoration, and cleanup.

def list_versions(self, symbol=None, snapshot=None, latest_only=False):
    """
    List versions for symbol(s).
    
    Parameters:
    - symbol: Specific symbol name (default: all symbols)
    - snapshot: List versions from specific snapshot
    - latest_only: Return only latest version for each symbol
    
    Returns:
    List of version information dictionaries
    """

def restore_version(self, symbol, as_of, prune_previous_version=True):
    """
    Restore symbol to previous version.
    
    Parameters:
    - symbol: Symbol name to restore
    - as_of: Datetime or version number to restore from
    - prune_previous_version: Remove current version after restore
    
    Returns:
    VersionedItem: Restored data with version information
    
    Raises:
    - NoDataFoundException: If version doesn't exist
    """

def delete(self, symbol):
    """
    Delete symbol and all its versions permanently.
    
    Parameters:
    - symbol: Symbol name to delete
    
    Raises:
    - NoDataFoundException: If symbol doesn't exist
    """

Snapshot Management

Creating and managing named snapshots for point-in-time data consistency across multiple symbols.

def snapshot(self, snap_name, metadata=None, skip_symbols=None, versions=None):
    """
    Create named snapshot of current data state.
    
    Parameters:
    - snap_name: Name for the snapshot
    - metadata: Optional snapshot metadata
    - skip_symbols: List of symbols to exclude from snapshot
    - versions: Specific versions to include (dict: symbol -> version)
    
    Returns:
    Snapshot information dictionary
    
    Raises:
    - DuplicateSnapshotException: If snapshot name already exists
    """

def delete_snapshot(self, snap_name):
    """
    Delete named snapshot.
    
    Parameters:
    - snap_name: Snapshot name to delete
    
    Raises:
    - NoDataFoundException: If snapshot doesn't exist
    """

def list_snapshots(self):
    """
    List all available snapshots.
    
    Returns:
    List of snapshot information dictionaries
    """

Information and Audit

Methods for retrieving detailed information about symbols, versions, and audit trails.

def get_info(self, symbol, as_of=None):
    """
    Get detailed information about symbol.
    
    Parameters:
    - symbol: Symbol name
    - as_of: Get info as of specific datetime
    
    Returns:
    dict: Comprehensive symbol information including size, versions, metadata
    
    Raises:
    - NoDataFoundException: If symbol doesn't exist
    """

def get_arctic_version(self, symbol, as_of=None):
    """
    Get Arctic version used to store symbol.
    
    Parameters:
    - symbol: Symbol name
    - as_of: Check version as of specific datetime
    
    Returns:
    str: Arctic version string
    """

def read_audit_log(self, symbol=None, message=None):
    """
    Read audit trail for operations.
    
    Parameters:
    - symbol: Filter by specific symbol (default: all)
    - message: Filter by message content
    
    Returns:
    List of audit log entries
    """

def stats(self):
    """
    Get version store statistics.
    
    Returns:
    dict: Store statistics including symbol counts, storage usage, etc.
    """

Types

VersionedItem

Container for versioned data with metadata and version information.

class VersionedItem:
    """
    Container for versioned data with complete metadata.
    
    Attributes:
    - symbol: Symbol name
    - library: Library reference  
    - data: Actual data (pandas DataFrame/Series, numpy array, etc.)
    - version: Version number
    - metadata: Metadata dictionary
    - host: Host information
    """
    
    def __init__(self, symbol, library, data, version, metadata, host=None):
        """
        Initialize versioned item.
        
        Parameters:
        - symbol: Symbol name
        - library: Library reference
        - data: Data payload
        - version: Version identifier
        - metadata: Metadata dictionary
        - host: Optional host information
        """
    
    def metadata_dict(self):
        """
        Get metadata as dictionary.
        
        Returns:
        dict: Complete metadata information
        """

Usage Examples

Basic Read/Write Operations

from arctic import Arctic, VERSION_STORE
import pandas as pd
import numpy as np

# Setup
arctic_conn = Arctic('mongodb://localhost:27017')
arctic_conn.initialize_library('prices', VERSION_STORE)
lib = arctic_conn['prices']

# Create sample data
dates = pd.date_range('2020-01-01', periods=1000, freq='min')
data = pd.DataFrame({
    'price': np.random.randn(1000).cumsum() + 100,
    'volume': np.random.randint(100, 1000, 1000)
}, index=dates)

# Write data with metadata
metadata = {'source': 'market_feed', 'currency': 'USD'}
lib.write('AAPL', data, metadata=metadata)

# Read data back
result = lib.read('AAPL')
print(f"Data shape: {result.data.shape}")
print(f"Metadata: {result.metadata}")
print(f"Version: {result.version}")

Version Management

# Create multiple versions
lib.write('AAPL', data[:500], metadata={'note': 'partial data'})
lib.append('AAPL', data[500:], metadata={'note': 'complete data'})

# List all versions
versions = lib.list_versions('AAPL')
for version in versions:
    print(f"Version {version['version']}: {version['date']}")

# Read specific version
old_data = lib.read('AAPL', from_version=1)
print(f"Version 1 shape: {old_data.data.shape}")

# Restore to previous version
lib.restore_version('AAPL', as_of=1)

Snapshot Operations

# Write multiple symbols
symbols = ['AAPL', 'GOOGL', 'MSFT']
for symbol in symbols:
    symbol_data = data * np.random.uniform(0.8, 1.2)  # Simulate different prices
    lib.write(symbol, symbol_data)

# Create snapshot
lib.snapshot('end_of_day_2020', metadata={'note': 'EOD snapshot'})

# List snapshots
snapshots = lib.list_snapshots()
for snap in snapshots:
    print(f"Snapshot: {snap['name']}, Created: {snap['date']}")

# Read from snapshot
snap_symbols = lib.list_symbols(snapshot='end_of_day_2020')
snap_data = lib.read('AAPL', snapshot='end_of_day_2020')

Temporal Data Access

from arctic.date import DateRange
from datetime import datetime

# Read data for specific date range
date_filter = DateRange(datetime(2020, 1, 1), datetime(2020, 1, 31))
jan_data = lib.read('AAPL', date_range=date_filter)
print(f"January data: {jan_data.data.shape}")

# Read as of specific time
as_of_data = lib.read('AAPL', as_of=datetime(2020, 1, 15))
print(f"Data as of Jan 15: {as_of_data.data.shape}")

# Append with date range validation
new_data = pd.DataFrame({
    'price': [105.0, 106.0],
    'volume': [1200, 1300]
}, index=pd.date_range('2020-02-01', periods=2, freq='min'))

lib.append('AAPL', new_data)

Audit and Information

# Get detailed symbol information
info = lib.get_info('AAPL')
print(f"Symbol info: {info}")

# Check Arctic version
version = lib.get_arctic_version('AAPL')
print(f"Stored with Arctic version: {version}")

# Read audit log
audit_entries = lib.read_audit_log('AAPL')
for entry in audit_entries[-5:]:  # Last 5 entries
    print(f"{entry['date']}: {entry['message']}")

# Get store statistics
stats = lib.stats()
print(f"Store stats: {stats}")

Install with Tessl CLI

npx tessl i tessl/pypi-arctic

docs

arctic-connection.md

async-operations.md

bson-store.md

chunk-store.md

date-utilities.md

index.md

tick-store.md

version-store.md

tile.json