pypi-streamlit

Describes: pypi pkg:pypi/streamlit@1.50.x

Description: A faster way to build and share data apps

Author: tessl

Last updated: 4 days ago

How to use

npx @tessl/cli registry install tessl/pypi-streamlit@1.50.0

Provide feedback Docs

caching-performance.md docs/

1
# Caching and Performance
2

3
Caching decorators and performance optimization tools for efficient data processing and resource management. Streamlit's caching system enables applications to avoid expensive recomputations and resource loading.
4

5
## Capabilities
6

7
### Data Caching
8

9
Cache expensive data computations that can be serialized and shared across sessions.
10

11
```python { .api }
12
def cache_data(func=None, *, ttl=None, max_entries=None, show_spinner=True, persist=None, experimental_allow_widgets=False, hash_funcs=None, validate=None):
13
    """
14
    Decorator to cache functions that return serializable data.
15

16
    Args:
17
        func (callable, optional): Function to cache (when used as decorator)
18
        ttl (float, optional): Time-to-live in seconds
19
        max_entries (int, optional): Maximum number of cached entries
20
        show_spinner (bool): Whether to show spinner during computation
21
        persist (str, optional): Persistence mode ("disk" for persistent storage)
22
        experimental_allow_widgets (bool): Allow widgets in cached functions
23
        hash_funcs (dict, optional): Custom hash functions for parameter types
24
        validate (callable, optional): Function to validate cached values
25

26
    Returns:
27
        callable: Decorated function with caching capability
28
    """
29
```
30

31
Example usage:
32
```python
33
@st.cache_data
34
def load_data(file_path):
35
    """Load and process data file."""
36
    df = pd.read_csv(file_path)
37
    return df.groupby('category').sum()
38

39
# Cache with TTL (expires after 1 hour)
40
@st.cache_data(ttl=3600)
41
def fetch_api_data(endpoint, params):
42
    """Fetch data from API with 1-hour cache."""
43
    response = requests.get(endpoint, params=params)
44
    return response.json()
45

46
# Cache with persistence (survives app restarts)
47
@st.cache_data(persist="disk")
48
def expensive_computation(data, algorithm):
49
    """Expensive ML computation with disk persistence."""
50
    model = train_model(data, algorithm)
51
    return model.predictions
52

53
# Cache with custom validation
54
@st.cache_data(validate=lambda x: len(x) > 0)
55
def get_user_data(user_id):
56
    """Get user data with validation."""
57
    return database.fetch_user(user_id)
58

59
# Cache with max entries limit
60
@st.cache_data(max_entries=100)
61
def process_query(query, filters):
62
    """Process search query with LRU eviction."""
63
    return search_engine.process(query, filters)
64
```
65

66
### Resource Caching
67

68
Cache global resources like database connections, models, and objects that cannot be serialized.
69

70
```python { .api }
71
def cache_resource(func=None, *, ttl=None, max_entries=None, show_spinner=True, validate=None, hash_funcs=None):
72
    """
73
    Decorator to cache functions that return non-serializable resources.
74

75
    Args:
76
        func (callable, optional): Function to cache (when used as decorator)
77
        ttl (float, optional): Time-to-live in seconds
78
        max_entries (int, optional): Maximum number of cached entries
79
        show_spinner (bool): Whether to show spinner during computation
80
        validate (callable, optional): Function to validate cached resources
81
        hash_funcs (dict, optional): Custom hash functions for parameter types
82

83
    Returns:
84
        callable: Decorated function with resource caching capability
85
    """
86
```
87

88
Example usage:
89
```python
90
@st.cache_resource
91
def get_database_connection():
92
    """Create database connection (shared across sessions)."""
93
    return sqlite3.connect("app.db", check_same_thread=False)
94

95
@st.cache_resource
96
def load_ml_model(model_path):
97
    """Load ML model (expensive, non-serializable)."""
98
    import tensorflow as tf
99
    return tf.keras.models.load_model(model_path)
100

101
# Resource with TTL (model refreshes daily)
102
@st.cache_resource(ttl=86400)
103
def get_trained_model(training_data_hash):
104
    """Load or train model with daily refresh."""
105
    return train_model(training_data_hash)
106

107
# Resource with validation
108
@st.cache_resource(validate=lambda conn: conn.is_connected())
109
def get_api_client(api_key):
110
    """Get API client with connection validation."""
111
    return APIClient(api_key)
112

113
# Limited resource cache
114
@st.cache_resource(max_entries=5)
115
def create_processor(config):
116
    """Create data processor (max 5 configurations cached)."""
117
    return DataProcessor(config)
118
```
119

120
### Legacy Caching (Deprecated)
121

122
The original caching function, now deprecated in favor of `cache_data` and `cache_resource`.
123

124
```python { .api }
125
def cache(func=None, persist=False, allow_output_mutation=False, show_spinner=True, suppress_st_warning=False, hash_funcs=None, max_entries=None, ttl=None):
126
    """
127
    Legacy caching decorator (deprecated).
128

129
    Args:
130
        func (callable, optional): Function to cache
131
        persist (bool): Whether to persist cache to disk
132
        allow_output_mutation (bool): Allow mutation of cached return values
133
        show_spinner (bool): Whether to show spinner during computation
134
        suppress_st_warning (bool): Suppress Streamlit warnings
135
        hash_funcs (dict, optional): Custom hash functions
136
        max_entries (int, optional): Maximum number of cached entries
137
        ttl (float, optional): Time-to-live in seconds
138

139
    Returns:
140
        callable: Decorated function with caching
141

142
    Note:
143
        Deprecated. Use st.cache_data or st.cache_resource instead.
144
    """
145
```
146

147
### Performance Optimization Patterns
148

149
#### Data Loading Optimization
150

151
```python
152
# Cache expensive data loading
153
@st.cache_data
154
def load_large_dataset(data_source):
155
    """Load and preprocess large dataset."""
156
    df = pd.read_parquet(data_source)  # Fast format
157
    df = df.fillna(0)  # Preprocessing
158
    return df
159

160
# Cache with parameters
161
@st.cache_data
162
def filter_data(df, category, date_range):
163
    """Filter dataset based on parameters."""
164
    mask = (df['category'] == category) &
165
           (df['date'].between(date_range[0], date_range[1]))
166
    return df[mask]
167

168
# Usage with cached functions
169
data = load_large_dataset("data.parquet")
170
filtered_data = filter_data(data, selected_category, date_range)
171
```
172

173
#### Model and Resource Management
174

175
```python
176
# Cache ML models
177
@st.cache_resource
178
def load_prediction_model():
179
    """Load trained model for predictions."""
180
    return joblib.load("model.pkl")
181

182
@st.cache_resource
183
def get_feature_encoder():
184
    """Load feature preprocessing pipeline."""
185
    return joblib.load("encoder.pkl")
186

187
# Cache database connections
188
@st.cache_resource
189
def init_database():
190
    """Initialize database connection pool."""
191
    return ConnectionPool(
192
        host="localhost",
193
        database="myapp",
194
        max_connections=10
195
    )
196

197
# Usage pattern
198
model = load_prediction_model()
199
encoder = get_feature_encoder()
200
db = init_database()
201

202
# Now use these cached resources
203
features = encoder.transform(user_input)
204
prediction = model.predict(features)
205
```
206

207
#### API and External Service Caching
208

209
```python
210
# Cache API calls with TTL
211
@st.cache_data(ttl=300)  # 5 minutes
212
def fetch_stock_prices(symbols):
213
    """Fetch current stock prices (cached for 5 minutes)."""
214
    api_key = st.secrets["stock_api_key"]
215
    response = requests.get(f"https://api.stocks.com/prices",
216
                          params={"symbols": ",".join(symbols), "key": api_key})
217
    return response.json()
218

219
@st.cache_data(ttl=3600)  # 1 hour
220
def get_weather_data(location):
221
    """Fetch weather data (cached for 1 hour)."""
222
    api_key = st.secrets["weather_api_key"]
223
    response = requests.get(f"https://api.weather.com/current",
224
                          params={"location": location, "key": api_key})
225
    return response.json()
226

227
# Usage with error handling
228
try:
229
    weather = get_weather_data(user_location)
230
    st.metric("Temperature", f"{weather['temp']}°F")
231
except Exception as e:
232
    st.error(f"Could not fetch weather data: {e}")
233
```
234

235
#### Custom Hash Functions
236

237
```python
238
# Custom hash for complex objects
239
def hash_dataframe(df):
240
    """Custom hash function for pandas DataFrames."""
241
    return hash(pd.util.hash_pandas_object(df).sum())
242

243
@st.cache_data(hash_funcs={pd.DataFrame: hash_dataframe})
244
def process_dataframe(df, operations):
245
    """Process DataFrame with custom hashing."""
246
    result = df.copy()
247
    for op in operations:
248
        result = apply_operation(result, op)
249
    return result
250

251
# Custom hash for file objects
252
def hash_file(file_obj):
253
    """Hash file based on content."""
254
    if hasattr(file_obj, 'name'):
255
        return hash((file_obj.name, os.path.getmtime(file_obj.name)))
256
    return hash(file_obj.read())
257

258
@st.cache_data(hash_funcs={type(open(__file__)): hash_file})
259
def process_uploaded_file(file):
260
    """Process uploaded file with content-based hashing."""
261
    return pd.read_csv(file)
262
```
263

264
#### Cache Management
265

266
```python
267
# Clear specific cache
268
@st.cache_data
269
def expensive_function(param):
270
    return compute_result(param)
271

272
# Clear cache manually
273
if st.button("Clear Cache"):
274
    expensive_function.clear()
275
    st.success("Cache cleared!")
276

277
# Clear all caches
278
if st.button("Clear All Caches"):
279
    st.cache_data.clear()
280
    st.cache_resource.clear()
281
    st.success("All caches cleared!")
282

283
# Conditional cache clearing
284
if st.checkbox("Force Refresh"):
285
    expensive_function.clear()
286
    result = expensive_function(user_input)
287
else:
288
    result = expensive_function(user_input)
289
```
290

291
#### Performance Monitoring
292

293
```python
294
import time
295
import streamlit as st
296

297
# Monitor cache performance
298
@st.cache_data
299
def monitored_function(data):
300
    start_time = time.time()
301
    result = expensive_computation(data)
302
    end_time = time.time()
303

304
    # Log performance metrics
305
    st.sidebar.metric("Computation Time", f"{end_time - start_time:.2f}s")
306
    return result
307

308
# Cache hit/miss tracking
309
cache_stats = {"hits": 0, "misses": 0}
310

311
@st.cache_data
312
def tracked_function(param):
313
    cache_stats["misses"] += 1
314
    return compute_result(param)
315

316
# Display cache statistics
317
col1, col2 = st.sidebar.columns(2)
318
col1.metric("Cache Hits", cache_stats["hits"])
319
col2.metric("Cache Misses", cache_stats["misses"])
320
```
321

322
### Best Practices
323

324
#### When to Use Each Cache Type
325

326
**Use `@st.cache_data` for:**
327
- Data loading from files, APIs, or databases
328
- Data transformations and computations
329
- Serializable objects (DataFrames, lists, dicts, numbers, strings)
330
- Results that can be safely shared across users
331

332
**Use `@st.cache_resource` for:**
333
- Database connections and connection pools
334
- ML models and trained algorithms
335
- File handles and open resources
336
- Objects with locks or threads
337
- Non-serializable or stateful objects
338

339
#### Cache Configuration Guidelines
340

341
```python
342
# For frequently accessed, stable data
343
@st.cache_data(persist="disk")
344
def load_reference_data():
345
    return pd.read_csv("reference.csv")
346

347
# For real-time data with appropriate TTL
348
@st.cache_data(ttl=60)  # 1 minute
349
def get_live_metrics():
350
    return fetch_current_metrics()
351

352
# For user-specific data with size limits
353
@st.cache_data(max_entries=1000)
354
def get_user_analysis(user_id, analysis_type):
355
    return perform_analysis(user_id, analysis_type)
356

357
# For expensive resources with validation
358
@st.cache_resource(validate=lambda x: x.is_healthy())
359
def get_ml_service():
360
    return MLService()
361
```