- Spec files
pypi-streamlit
Describes: pkg:pypi/streamlit@1.50.x
- Description
- A faster way to build and share data apps
- Author
- tessl
- Last updated
caching-performance.md docs/
1# Caching and Performance23Caching decorators and performance optimization tools for efficient data processing and resource management. Streamlit's caching system enables applications to avoid expensive recomputations and resource loading.45## Capabilities67### Data Caching89Cache expensive data computations that can be serialized and shared across sessions.1011```python { .api }12def cache_data(func=None, *, ttl=None, max_entries=None, show_spinner=True, persist=None, experimental_allow_widgets=False, hash_funcs=None, validate=None):13"""14Decorator to cache functions that return serializable data.1516Args:17func (callable, optional): Function to cache (when used as decorator)18ttl (float, optional): Time-to-live in seconds19max_entries (int, optional): Maximum number of cached entries20show_spinner (bool): Whether to show spinner during computation21persist (str, optional): Persistence mode ("disk" for persistent storage)22experimental_allow_widgets (bool): Allow widgets in cached functions23hash_funcs (dict, optional): Custom hash functions for parameter types24validate (callable, optional): Function to validate cached values2526Returns:27callable: Decorated function with caching capability28"""29```3031Example usage:32```python33@st.cache_data34def load_data(file_path):35"""Load and process data file."""36df = pd.read_csv(file_path)37return df.groupby('category').sum()3839# Cache with TTL (expires after 1 hour)40@st.cache_data(ttl=3600)41def fetch_api_data(endpoint, params):42"""Fetch data from API with 1-hour cache."""43response = requests.get(endpoint, params=params)44return response.json()4546# Cache with persistence (survives app restarts)47@st.cache_data(persist="disk")48def expensive_computation(data, algorithm):49"""Expensive ML computation with disk persistence."""50model = train_model(data, algorithm)51return model.predictions5253# Cache with custom validation54@st.cache_data(validate=lambda x: len(x) > 0)55def get_user_data(user_id):56"""Get user data with validation."""57return database.fetch_user(user_id)5859# Cache with max entries limit60@st.cache_data(max_entries=100)61def process_query(query, filters):62"""Process search query with LRU eviction."""63return search_engine.process(query, filters)64```6566### Resource Caching6768Cache global resources like database connections, models, and objects that cannot be serialized.6970```python { .api }71def cache_resource(func=None, *, ttl=None, max_entries=None, show_spinner=True, validate=None, hash_funcs=None):72"""73Decorator to cache functions that return non-serializable resources.7475Args:76func (callable, optional): Function to cache (when used as decorator)77ttl (float, optional): Time-to-live in seconds78max_entries (int, optional): Maximum number of cached entries79show_spinner (bool): Whether to show spinner during computation80validate (callable, optional): Function to validate cached resources81hash_funcs (dict, optional): Custom hash functions for parameter types8283Returns:84callable: Decorated function with resource caching capability85"""86```8788Example usage:89```python90@st.cache_resource91def get_database_connection():92"""Create database connection (shared across sessions)."""93return sqlite3.connect("app.db", check_same_thread=False)9495@st.cache_resource96def load_ml_model(model_path):97"""Load ML model (expensive, non-serializable)."""98import tensorflow as tf99return tf.keras.models.load_model(model_path)100101# Resource with TTL (model refreshes daily)102@st.cache_resource(ttl=86400)103def get_trained_model(training_data_hash):104"""Load or train model with daily refresh."""105return train_model(training_data_hash)106107# Resource with validation108@st.cache_resource(validate=lambda conn: conn.is_connected())109def get_api_client(api_key):110"""Get API client with connection validation."""111return APIClient(api_key)112113# Limited resource cache114@st.cache_resource(max_entries=5)115def create_processor(config):116"""Create data processor (max 5 configurations cached)."""117return DataProcessor(config)118```119120### Legacy Caching (Deprecated)121122The original caching function, now deprecated in favor of `cache_data` and `cache_resource`.123124```python { .api }125def cache(func=None, persist=False, allow_output_mutation=False, show_spinner=True, suppress_st_warning=False, hash_funcs=None, max_entries=None, ttl=None):126"""127Legacy caching decorator (deprecated).128129Args:130func (callable, optional): Function to cache131persist (bool): Whether to persist cache to disk132allow_output_mutation (bool): Allow mutation of cached return values133show_spinner (bool): Whether to show spinner during computation134suppress_st_warning (bool): Suppress Streamlit warnings135hash_funcs (dict, optional): Custom hash functions136max_entries (int, optional): Maximum number of cached entries137ttl (float, optional): Time-to-live in seconds138139Returns:140callable: Decorated function with caching141142Note:143Deprecated. Use st.cache_data or st.cache_resource instead.144"""145```146147### Performance Optimization Patterns148149#### Data Loading Optimization150151```python152# Cache expensive data loading153@st.cache_data154def load_large_dataset(data_source):155"""Load and preprocess large dataset."""156df = pd.read_parquet(data_source) # Fast format157df = df.fillna(0) # Preprocessing158return df159160# Cache with parameters161@st.cache_data162def filter_data(df, category, date_range):163"""Filter dataset based on parameters."""164mask = (df['category'] == category) &165(df['date'].between(date_range[0], date_range[1]))166return df[mask]167168# Usage with cached functions169data = load_large_dataset("data.parquet")170filtered_data = filter_data(data, selected_category, date_range)171```172173#### Model and Resource Management174175```python176# Cache ML models177@st.cache_resource178def load_prediction_model():179"""Load trained model for predictions."""180return joblib.load("model.pkl")181182@st.cache_resource183def get_feature_encoder():184"""Load feature preprocessing pipeline."""185return joblib.load("encoder.pkl")186187# Cache database connections188@st.cache_resource189def init_database():190"""Initialize database connection pool."""191return ConnectionPool(192host="localhost",193database="myapp",194max_connections=10195)196197# Usage pattern198model = load_prediction_model()199encoder = get_feature_encoder()200db = init_database()201202# Now use these cached resources203features = encoder.transform(user_input)204prediction = model.predict(features)205```206207#### API and External Service Caching208209```python210# Cache API calls with TTL211@st.cache_data(ttl=300) # 5 minutes212def fetch_stock_prices(symbols):213"""Fetch current stock prices (cached for 5 minutes)."""214api_key = st.secrets["stock_api_key"]215response = requests.get(f"https://api.stocks.com/prices",216params={"symbols": ",".join(symbols), "key": api_key})217return response.json()218219@st.cache_data(ttl=3600) # 1 hour220def get_weather_data(location):221"""Fetch weather data (cached for 1 hour)."""222api_key = st.secrets["weather_api_key"]223response = requests.get(f"https://api.weather.com/current",224params={"location": location, "key": api_key})225return response.json()226227# Usage with error handling228try:229weather = get_weather_data(user_location)230st.metric("Temperature", f"{weather['temp']}°F")231except Exception as e:232st.error(f"Could not fetch weather data: {e}")233```234235#### Custom Hash Functions236237```python238# Custom hash for complex objects239def hash_dataframe(df):240"""Custom hash function for pandas DataFrames."""241return hash(pd.util.hash_pandas_object(df).sum())242243@st.cache_data(hash_funcs={pd.DataFrame: hash_dataframe})244def process_dataframe(df, operations):245"""Process DataFrame with custom hashing."""246result = df.copy()247for op in operations:248result = apply_operation(result, op)249return result250251# Custom hash for file objects252def hash_file(file_obj):253"""Hash file based on content."""254if hasattr(file_obj, 'name'):255return hash((file_obj.name, os.path.getmtime(file_obj.name)))256return hash(file_obj.read())257258@st.cache_data(hash_funcs={type(open(__file__)): hash_file})259def process_uploaded_file(file):260"""Process uploaded file with content-based hashing."""261return pd.read_csv(file)262```263264#### Cache Management265266```python267# Clear specific cache268@st.cache_data269def expensive_function(param):270return compute_result(param)271272# Clear cache manually273if st.button("Clear Cache"):274expensive_function.clear()275st.success("Cache cleared!")276277# Clear all caches278if st.button("Clear All Caches"):279st.cache_data.clear()280st.cache_resource.clear()281st.success("All caches cleared!")282283# Conditional cache clearing284if st.checkbox("Force Refresh"):285expensive_function.clear()286result = expensive_function(user_input)287else:288result = expensive_function(user_input)289```290291#### Performance Monitoring292293```python294import time295import streamlit as st296297# Monitor cache performance298@st.cache_data299def monitored_function(data):300start_time = time.time()301result = expensive_computation(data)302end_time = time.time()303304# Log performance metrics305st.sidebar.metric("Computation Time", f"{end_time - start_time:.2f}s")306return result307308# Cache hit/miss tracking309cache_stats = {"hits": 0, "misses": 0}310311@st.cache_data312def tracked_function(param):313cache_stats["misses"] += 1314return compute_result(param)315316# Display cache statistics317col1, col2 = st.sidebar.columns(2)318col1.metric("Cache Hits", cache_stats["hits"])319col2.metric("Cache Misses", cache_stats["misses"])320```321322### Best Practices323324#### When to Use Each Cache Type325326**Use `@st.cache_data` for:**327- Data loading from files, APIs, or databases328- Data transformations and computations329- Serializable objects (DataFrames, lists, dicts, numbers, strings)330- Results that can be safely shared across users331332**Use `@st.cache_resource` for:**333- Database connections and connection pools334- ML models and trained algorithms335- File handles and open resources336- Objects with locks or threads337- Non-serializable or stateful objects338339#### Cache Configuration Guidelines340341```python342# For frequently accessed, stable data343@st.cache_data(persist="disk")344def load_reference_data():345return pd.read_csv("reference.csv")346347# For real-time data with appropriate TTL348@st.cache_data(ttl=60) # 1 minute349def get_live_metrics():350return fetch_current_metrics()351352# For user-specific data with size limits353@st.cache_data(max_entries=1000)354def get_user_analysis(user_id, analysis_type):355return perform_analysis(user_id, analysis_type)356357# For expensive resources with validation358@st.cache_resource(validate=lambda x: x.is_healthy())359def get_ml_service():360return MLService()361```