AHL Research Versioned TimeSeries and Tick store for high-performance financial data storage and analysis
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-frequency tick data storage with efficient columnar compression and time-based partitioning. Optimized for financial tick data with support for initial images, metadata persistence, and date range queries for maximum performance with large volumes of timestamped market data.
High-frequency tick data store optimized for financial market data with efficient columnar storage and date-based partitioning.
class TickStore:
"""
Tick-specific storage optimized for high-frequency financial data.
Provides columnar storage for tick data with efficient compression,
supports initial images for maintaining state, and enables fast
date range queries for market data analysis.
"""
def __init__(self, arctic_lib, chunk_size=100000):
"""
Initialize TickStore with configurable chunk size.
Parameters:
- arctic_lib: ArcticLibraryBinding instance
- chunk_size: Number of ticks per chunk (default: 100,000)
"""Operations for managing tick data symbols including listing, deletion, and existence checking.
def list_symbols(self, date_range=None):
"""
List available symbols, optionally filtered by date range.
Parameters:
- date_range: DateRange object to filter symbols by data availability
Returns:
List of symbol names that have tick data
"""
def delete(self, symbol, date_range=None):
"""
Delete symbol or specific date range of tick data.
Parameters:
- symbol: Symbol name to delete
- date_range: Specific date range to delete (default: all data)
Raises:
- NoDataFoundException: If symbol doesn't exist
"""Methods for retrieving tick data with date filtering, column selection, and image handling.
def read(self, symbol, date_range=None, columns=None, include_images=False,
allow_secondary=None, **kwargs):
"""
Read tick data with flexible filtering and column selection.
Parameters:
- symbol: Symbol name to read
- date_range: DateRange object for temporal filtering
- columns: List of columns to return (default: all columns)
- include_images: Include initial images in results (default: False)
- allow_secondary: Allow reads from MongoDB secondary nodes
- **kwargs: Additional read parameters
Returns:
pandas.DataFrame: Tick data with timestamp index
Raises:
- NoDataFoundException: If symbol or date range has no data
"""
def read_metadata(self, symbol):
"""
Read symbol metadata without loading tick data.
Parameters:
- symbol: Symbol name
Returns:
dict: Symbol metadata including data statistics
Raises:
- NoDataFoundException: If symbol doesn't exist
"""Methods for storing tick data with initial image support and metadata persistence.
def write(self, symbol, data, initial_image=None, metadata=None):
"""
Write tick data with optional initial image and metadata.
Parameters:
- symbol: Symbol name to write
- data: Tick data as pandas DataFrame with timestamp index
- initial_image: Dictionary representing initial state/image
- metadata: Optional metadata dictionary
Raises:
- OverlappingDataException: If data overlaps with existing ticks
- UnorderedDataException: If data not properly time-ordered
- UnhandledDtypeException: If data contains unsupported types
"""Methods for querying date boundaries and data availability for symbols.
def max_date(self, symbol):
"""
Get maximum (latest) date for symbol's tick data.
Parameters:
- symbol: Symbol name
Returns:
datetime: Latest timestamp in the data
Raises:
- NoDataFoundException: If symbol has no data
"""
def min_date(self, symbol):
"""
Get minimum (earliest) date for symbol's tick data.
Parameters:
- symbol: Symbol name
Returns:
datetime: Earliest timestamp in the data
Raises:
- NoDataFoundException: If symbol has no data
"""Methods for retrieving storage statistics and performance information.
def stats(self):
"""
Get tick store statistics and performance metrics.
Returns:
dict: Statistics including symbol counts, storage usage, chunk info
"""from arctic import Arctic, TICK_STORE
import pandas as pd
import numpy as np
from datetime import datetime
# Setup tick store
arctic_conn = Arctic('mongodb://localhost:27017')
arctic_conn.initialize_library('ticks', TICK_STORE)
tick_lib = arctic_conn['ticks']
# Create sample tick data
timestamps = pd.date_range('2020-01-01 09:30:00',
'2020-01-01 16:00:00',
freq='100ms')
tick_data = pd.DataFrame({
'bid': np.random.uniform(99.95, 100.05, len(timestamps)),
'ask': np.random.uniform(100.00, 100.10, len(timestamps)),
'bid_size': np.random.randint(100, 1000, len(timestamps)),
'ask_size': np.random.randint(100, 1000, len(timestamps)),
'trade_price': np.random.uniform(99.98, 100.08, len(timestamps)),
'trade_size': np.random.randint(100, 5000, len(timestamps))
}, index=timestamps)
# Write tick data with initial image
initial_image = {
'opening_price': 100.00,
'previous_close': 99.95,
'session_date': '2020-01-01'
}
metadata = {
'exchange': 'NYSE',
'symbol_type': 'equity',
'currency': 'USD'
}
tick_lib.write('AAPL', tick_data,
initial_image=initial_image,
metadata=metadata)from arctic.date import DateRange
# Read all tick data
all_ticks = tick_lib.read('AAPL')
print(f"Total ticks: {len(all_ticks)}")
# Read specific time range (morning session)
morning_range = DateRange(
datetime(2020, 1, 1, 9, 30),
datetime(2020, 1, 1, 12, 0)
)
morning_ticks = tick_lib.read('AAPL', date_range=morning_range)
print(f"Morning ticks: {len(morning_ticks)}")
# Read specific columns only
price_data = tick_lib.read('AAPL',
columns=['bid', 'ask', 'trade_price'])
print(f"Price columns: {list(price_data.columns)}")
# Read with initial images
ticks_with_images = tick_lib.read('AAPL', include_images=True)
# Images are included as special rows in the DataFrame# Get data boundaries
earliest = tick_lib.min_date('AAPL')
latest = tick_lib.max_date('AAPL')
print(f"Data range: {earliest} to {latest}")
# List symbols with data in specific date range
symbols_today = tick_lib.list_symbols(
date_range=DateRange(datetime(2020, 1, 1), datetime(2020, 1, 2))
)
print(f"Symbols with data today: {symbols_today}")
# Check metadata
metadata = tick_lib.read_metadata('AAPL')
print(f"Symbol metadata: {metadata}")# Write data for multiple symbols
symbols = ['AAPL', 'GOOGL', 'MSFT']
for i, symbol in enumerate(symbols):
# Generate different data for each symbol
symbol_ticks = tick_data * (1 + i * 0.1) # Price variation
symbol_ticks.index = timestamps # Same time range
tick_lib.write(symbol, symbol_ticks, metadata={
'symbol': symbol,
'sector': 'technology',
'listing': 'NASDAQ' if symbol != 'AAPL' else 'NYSE'
})
# List all available symbols
all_symbols = tick_lib.list_symbols()
print(f"Available symbols: {all_symbols}")
# Get statistics
stats = tick_lib.stats()
print(f"Tick store statistics: {stats}")# Append additional tick data (must be non-overlapping)
additional_timestamps = pd.date_range('2020-01-01 16:00:01',
'2020-01-01 17:00:00',
freq='100ms')
additional_ticks = pd.DataFrame({
'bid': np.random.uniform(100.00, 100.10, len(additional_timestamps)),
'ask': np.random.uniform(100.05, 100.15, len(additional_timestamps)),
'bid_size': np.random.randint(100, 1000, len(additional_timestamps)),
'ask_size': np.random.randint(100, 1000, len(additional_timestamps)),
'trade_price': np.random.uniform(100.03, 100.13, len(additional_timestamps)),
'trade_size': np.random.randint(100, 5000, len(additional_timestamps))
}, index=additional_timestamps)
# Note: TickStore doesn't have an append method, so you would typically
# write new data to a different date range or use update operations
# Delete specific date range
lunch_range = DateRange(
datetime(2020, 1, 1, 12, 0),
datetime(2020, 1, 1, 13, 30)
)
tick_lib.delete('AAPL', date_range=lunch_range)
# Verify deletion
remaining_ticks = tick_lib.read('AAPL')
print(f"Ticks after deletion: {len(remaining_ticks)}")# Use custom chunk size for better performance
from arctic.tickstore import TickStore
# Larger chunks for less frequent access
large_chunk_lib = TickStore(arctic_conn['ticks'], chunk_size=500000)
# Smaller chunks for more frequent random access
small_chunk_lib = TickStore(arctic_conn['ticks'], chunk_size=50000)
# Read from secondary for analytics (doesn't need latest data)
analytics_data = tick_lib.read('AAPL',
date_range=morning_range,
allow_secondary=True)
# Column filtering for bandwidth efficiency
trades_only = tick_lib.read('AAPL',
columns=['trade_price', 'trade_size'])Install with Tessl CLI
npx tessl i tessl/pypi-arctic