pypi-streamlit

Description
A faster way to build and share data apps
Author
tessl
Last updated

How to use

npx @tessl/cli registry install tessl/pypi-streamlit@1.50.0

caching-performance.md docs/

1
# Caching and Performance
2
3
Caching decorators and performance optimization tools for efficient data processing and resource management. Streamlit's caching system enables applications to avoid expensive recomputations and resource loading.
4
5
## Capabilities
6
7
### Data Caching
8
9
Cache expensive data computations that can be serialized and shared across sessions.
10
11
```python { .api }
12
def cache_data(func=None, *, ttl=None, max_entries=None, show_spinner=True, persist=None, experimental_allow_widgets=False, hash_funcs=None, validate=None):
13
"""
14
Decorator to cache functions that return serializable data.
15
16
Args:
17
func (callable, optional): Function to cache (when used as decorator)
18
ttl (float, optional): Time-to-live in seconds
19
max_entries (int, optional): Maximum number of cached entries
20
show_spinner (bool): Whether to show spinner during computation
21
persist (str, optional): Persistence mode ("disk" for persistent storage)
22
experimental_allow_widgets (bool): Allow widgets in cached functions
23
hash_funcs (dict, optional): Custom hash functions for parameter types
24
validate (callable, optional): Function to validate cached values
25
26
Returns:
27
callable: Decorated function with caching capability
28
"""
29
```
30
31
Example usage:
32
```python
33
@st.cache_data
34
def load_data(file_path):
35
"""Load and process data file."""
36
df = pd.read_csv(file_path)
37
return df.groupby('category').sum()
38
39
# Cache with TTL (expires after 1 hour)
40
@st.cache_data(ttl=3600)
41
def fetch_api_data(endpoint, params):
42
"""Fetch data from API with 1-hour cache."""
43
response = requests.get(endpoint, params=params)
44
return response.json()
45
46
# Cache with persistence (survives app restarts)
47
@st.cache_data(persist="disk")
48
def expensive_computation(data, algorithm):
49
"""Expensive ML computation with disk persistence."""
50
model = train_model(data, algorithm)
51
return model.predictions
52
53
# Cache with custom validation
54
@st.cache_data(validate=lambda x: len(x) > 0)
55
def get_user_data(user_id):
56
"""Get user data with validation."""
57
return database.fetch_user(user_id)
58
59
# Cache with max entries limit
60
@st.cache_data(max_entries=100)
61
def process_query(query, filters):
62
"""Process search query with LRU eviction."""
63
return search_engine.process(query, filters)
64
```
65
66
### Resource Caching
67
68
Cache global resources like database connections, models, and objects that cannot be serialized.
69
70
```python { .api }
71
def cache_resource(func=None, *, ttl=None, max_entries=None, show_spinner=True, validate=None, hash_funcs=None):
72
"""
73
Decorator to cache functions that return non-serializable resources.
74
75
Args:
76
func (callable, optional): Function to cache (when used as decorator)
77
ttl (float, optional): Time-to-live in seconds
78
max_entries (int, optional): Maximum number of cached entries
79
show_spinner (bool): Whether to show spinner during computation
80
validate (callable, optional): Function to validate cached resources
81
hash_funcs (dict, optional): Custom hash functions for parameter types
82
83
Returns:
84
callable: Decorated function with resource caching capability
85
"""
86
```
87
88
Example usage:
89
```python
90
@st.cache_resource
91
def get_database_connection():
92
"""Create database connection (shared across sessions)."""
93
return sqlite3.connect("app.db", check_same_thread=False)
94
95
@st.cache_resource
96
def load_ml_model(model_path):
97
"""Load ML model (expensive, non-serializable)."""
98
import tensorflow as tf
99
return tf.keras.models.load_model(model_path)
100
101
# Resource with TTL (model refreshes daily)
102
@st.cache_resource(ttl=86400)
103
def get_trained_model(training_data_hash):
104
"""Load or train model with daily refresh."""
105
return train_model(training_data_hash)
106
107
# Resource with validation
108
@st.cache_resource(validate=lambda conn: conn.is_connected())
109
def get_api_client(api_key):
110
"""Get API client with connection validation."""
111
return APIClient(api_key)
112
113
# Limited resource cache
114
@st.cache_resource(max_entries=5)
115
def create_processor(config):
116
"""Create data processor (max 5 configurations cached)."""
117
return DataProcessor(config)
118
```
119
120
### Legacy Caching (Deprecated)
121
122
The original caching function, now deprecated in favor of `cache_data` and `cache_resource`.
123
124
```python { .api }
125
def cache(func=None, persist=False, allow_output_mutation=False, show_spinner=True, suppress_st_warning=False, hash_funcs=None, max_entries=None, ttl=None):
126
"""
127
Legacy caching decorator (deprecated).
128
129
Args:
130
func (callable, optional): Function to cache
131
persist (bool): Whether to persist cache to disk
132
allow_output_mutation (bool): Allow mutation of cached return values
133
show_spinner (bool): Whether to show spinner during computation
134
suppress_st_warning (bool): Suppress Streamlit warnings
135
hash_funcs (dict, optional): Custom hash functions
136
max_entries (int, optional): Maximum number of cached entries
137
ttl (float, optional): Time-to-live in seconds
138
139
Returns:
140
callable: Decorated function with caching
141
142
Note:
143
Deprecated. Use st.cache_data or st.cache_resource instead.
144
"""
145
```
146
147
### Performance Optimization Patterns
148
149
#### Data Loading Optimization
150
151
```python
152
# Cache expensive data loading
153
@st.cache_data
154
def load_large_dataset(data_source):
155
"""Load and preprocess large dataset."""
156
df = pd.read_parquet(data_source) # Fast format
157
df = df.fillna(0) # Preprocessing
158
return df
159
160
# Cache with parameters
161
@st.cache_data
162
def filter_data(df, category, date_range):
163
"""Filter dataset based on parameters."""
164
mask = (df['category'] == category) &
165
(df['date'].between(date_range[0], date_range[1]))
166
return df[mask]
167
168
# Usage with cached functions
169
data = load_large_dataset("data.parquet")
170
filtered_data = filter_data(data, selected_category, date_range)
171
```
172
173
#### Model and Resource Management
174
175
```python
176
# Cache ML models
177
@st.cache_resource
178
def load_prediction_model():
179
"""Load trained model for predictions."""
180
return joblib.load("model.pkl")
181
182
@st.cache_resource
183
def get_feature_encoder():
184
"""Load feature preprocessing pipeline."""
185
return joblib.load("encoder.pkl")
186
187
# Cache database connections
188
@st.cache_resource
189
def init_database():
190
"""Initialize database connection pool."""
191
return ConnectionPool(
192
host="localhost",
193
database="myapp",
194
max_connections=10
195
)
196
197
# Usage pattern
198
model = load_prediction_model()
199
encoder = get_feature_encoder()
200
db = init_database()
201
202
# Now use these cached resources
203
features = encoder.transform(user_input)
204
prediction = model.predict(features)
205
```
206
207
#### API and External Service Caching
208
209
```python
210
# Cache API calls with TTL
211
@st.cache_data(ttl=300) # 5 minutes
212
def fetch_stock_prices(symbols):
213
"""Fetch current stock prices (cached for 5 minutes)."""
214
api_key = st.secrets["stock_api_key"]
215
response = requests.get(f"https://api.stocks.com/prices",
216
params={"symbols": ",".join(symbols), "key": api_key})
217
return response.json()
218
219
@st.cache_data(ttl=3600) # 1 hour
220
def get_weather_data(location):
221
"""Fetch weather data (cached for 1 hour)."""
222
api_key = st.secrets["weather_api_key"]
223
response = requests.get(f"https://api.weather.com/current",
224
params={"location": location, "key": api_key})
225
return response.json()
226
227
# Usage with error handling
228
try:
229
weather = get_weather_data(user_location)
230
st.metric("Temperature", f"{weather['temp']}°F")
231
except Exception as e:
232
st.error(f"Could not fetch weather data: {e}")
233
```
234
235
#### Custom Hash Functions
236
237
```python
238
# Custom hash for complex objects
239
def hash_dataframe(df):
240
"""Custom hash function for pandas DataFrames."""
241
return hash(pd.util.hash_pandas_object(df).sum())
242
243
@st.cache_data(hash_funcs={pd.DataFrame: hash_dataframe})
244
def process_dataframe(df, operations):
245
"""Process DataFrame with custom hashing."""
246
result = df.copy()
247
for op in operations:
248
result = apply_operation(result, op)
249
return result
250
251
# Custom hash for file objects
252
def hash_file(file_obj):
253
"""Hash file based on content."""
254
if hasattr(file_obj, 'name'):
255
return hash((file_obj.name, os.path.getmtime(file_obj.name)))
256
return hash(file_obj.read())
257
258
@st.cache_data(hash_funcs={type(open(__file__)): hash_file})
259
def process_uploaded_file(file):
260
"""Process uploaded file with content-based hashing."""
261
return pd.read_csv(file)
262
```
263
264
#### Cache Management
265
266
```python
267
# Clear specific cache
268
@st.cache_data
269
def expensive_function(param):
270
return compute_result(param)
271
272
# Clear cache manually
273
if st.button("Clear Cache"):
274
expensive_function.clear()
275
st.success("Cache cleared!")
276
277
# Clear all caches
278
if st.button("Clear All Caches"):
279
st.cache_data.clear()
280
st.cache_resource.clear()
281
st.success("All caches cleared!")
282
283
# Conditional cache clearing
284
if st.checkbox("Force Refresh"):
285
expensive_function.clear()
286
result = expensive_function(user_input)
287
else:
288
result = expensive_function(user_input)
289
```
290
291
#### Performance Monitoring
292
293
```python
294
import time
295
import streamlit as st
296
297
# Monitor cache performance
298
@st.cache_data
299
def monitored_function(data):
300
start_time = time.time()
301
result = expensive_computation(data)
302
end_time = time.time()
303
304
# Log performance metrics
305
st.sidebar.metric("Computation Time", f"{end_time - start_time:.2f}s")
306
return result
307
308
# Cache hit/miss tracking
309
cache_stats = {"hits": 0, "misses": 0}
310
311
@st.cache_data
312
def tracked_function(param):
313
cache_stats["misses"] += 1
314
return compute_result(param)
315
316
# Display cache statistics
317
col1, col2 = st.sidebar.columns(2)
318
col1.metric("Cache Hits", cache_stats["hits"])
319
col2.metric("Cache Misses", cache_stats["misses"])
320
```
321
322
### Best Practices
323
324
#### When to Use Each Cache Type
325
326
**Use `@st.cache_data` for:**
327
- Data loading from files, APIs, or databases
328
- Data transformations and computations
329
- Serializable objects (DataFrames, lists, dicts, numbers, strings)
330
- Results that can be safely shared across users
331
332
**Use `@st.cache_resource` for:**
333
- Database connections and connection pools
334
- ML models and trained algorithms
335
- File handles and open resources
336
- Objects with locks or threads
337
- Non-serializable or stateful objects
338
339
#### Cache Configuration Guidelines
340
341
```python
342
# For frequently accessed, stable data
343
@st.cache_data(persist="disk")
344
def load_reference_data():
345
return pd.read_csv("reference.csv")
346
347
# For real-time data with appropriate TTL
348
@st.cache_data(ttl=60) # 1 minute
349
def get_live_metrics():
350
return fetch_current_metrics()
351
352
# For user-specific data with size limits
353
@st.cache_data(max_entries=1000)
354
def get_user_analysis(user_id, analysis_type):
355
return perform_analysis(user_id, analysis_type)
356
357
# For expensive resources with validation
358
@st.cache_resource(validate=lambda x: x.is_healthy())
359
def get_ml_service():
360
return MLService()
361
```