0
# Adapters
1
2
Extensible plugin system for connecting to various data sources through SQL queries. Adapters translate SQL operations to source-specific requests, enabling unified querying across APIs, files, databases, and in-memory data.
3
4
## Capabilities
5
6
### Base Adapter Framework
7
8
Foundation classes for implementing custom data source adapters with standardized interface and lifecycle management.
9
10
```python { .api }
11
class Adapter:
12
"""Base adapter class for data sources."""
13
14
@classmethod
15
def supports(cls, uri, fast=True, **kwargs):
16
"""
17
Check if adapter supports the given URI.
18
19
Parameters:
20
- uri (str): Data source URI to check
21
- fast (bool): If True, avoid network requests for quick check
22
- **kwargs: Adapter-specific configuration
23
24
Returns:
25
bool or None: True if supported, False if not, None if uncertain
26
"""
27
28
@classmethod
29
def parse_uri(cls, uri):
30
"""
31
Parse URI into adapter initialization arguments.
32
33
Parameters:
34
- uri (str): Data source URI to parse
35
36
Returns:
37
tuple: Arguments for adapter initialization
38
"""
39
40
def get_columns(self):
41
"""
42
Get column definitions for the data source.
43
44
Returns:
45
dict: Column name to Field mapping
46
"""
47
48
def get_data(self, bounds=None, order=None, **kwargs):
49
"""
50
Get data from the source with optional filtering and ordering.
51
52
Parameters:
53
- bounds (dict, optional): Column filters
54
- order (list, optional): Ordering specifications
55
- **kwargs: Additional query parameters
56
57
Returns:
58
Iterator[dict]: Stream of row dictionaries
59
"""
60
61
def get_cost(self, filtered_columns, order):
62
"""
63
Estimate query cost for optimization.
64
65
Parameters:
66
- filtered_columns (list): Columns with filters applied
67
- order (list): Ordering specifications
68
69
Returns:
70
int: Estimated query cost
71
"""
72
```
73
74
### File Adapters
75
76
Adapters for file-based data sources supporting local and remote files.
77
78
```python { .api }
79
class CSVFile:
80
"""CSV file adapter."""
81
82
def __init__(self, filepath, **kwargs):
83
"""
84
Initialize CSV adapter.
85
86
Parameters:
87
- filepath (str): Path to CSV file (local or URL)
88
- encoding (str, optional): File encoding
89
- delimiter (str, optional): CSV delimiter
90
- quotechar (str, optional): Quote character
91
- has_header (bool, optional): Whether file has header row
92
"""
93
```
94
95
Usage example:
96
97
```python
98
# Query local CSV file
99
cursor.execute("SELECT * FROM './data/sales.csv' WHERE amount > 1000")
100
101
# Query remote CSV file
102
cursor.execute("SELECT * FROM 'https://example.com/data.csv'")
103
```
104
105
### API Adapters
106
107
Adapters for various web APIs and services with authentication and caching support.
108
109
```python { .api }
110
class GSheetsAPI:
111
"""Google Sheets API adapter."""
112
113
def __init__(self, url, service_account_file=None, **kwargs):
114
"""
115
Initialize Google Sheets adapter.
116
117
Parameters:
118
- url (str): Google Sheets URL
119
- service_account_file (str, optional): Path to service account JSON
120
- subject (str, optional): Email for impersonation
121
"""
122
123
class GitHubAPI:
124
"""GitHub API adapter."""
125
126
def __init__(self, url, token=None, **kwargs):
127
"""
128
Initialize GitHub API adapter.
129
130
Parameters:
131
- url (str): GitHub API URL
132
- token (str, optional): GitHub access token
133
"""
134
135
class GenericJSONAPI:
136
"""Generic JSON API adapter."""
137
138
def __init__(self, url, path=None, **kwargs):
139
"""
140
Initialize generic JSON API adapter.
141
142
Parameters:
143
- url (str): JSON API endpoint URL
144
- path (str, optional): JSONPath expression for data extraction
145
- headers (dict, optional): HTTP headers
146
"""
147
148
class HTMLTableAPI:
149
"""HTML table scraper adapter."""
150
151
def __init__(self, url, table_index=0, **kwargs):
152
"""
153
Initialize HTML table adapter.
154
155
Parameters:
156
- url (str): URL containing HTML tables
157
- table_index (int): Index of table to extract (default: 0)
158
"""
159
160
class S3SelectAPI:
161
"""AWS S3 Select adapter."""
162
163
def __init__(self, url, aws_access_key_id=None, **kwargs):
164
"""
165
Initialize S3 Select adapter.
166
167
Parameters:
168
- url (str): S3 object URL (s3://bucket/key)
169
- aws_access_key_id (str, optional): AWS access key
170
- aws_secret_access_key (str, optional): AWS secret key
171
"""
172
```
173
174
### Memory Adapters
175
176
Adapters for in-memory data sources and virtual tables.
177
178
```python { .api }
179
class PandasMemory:
180
"""Pandas DataFrame adapter."""
181
182
def __init__(self, df, **kwargs):
183
"""
184
Initialize pandas adapter.
185
186
Parameters:
187
- df (pandas.DataFrame): DataFrame to query
188
- table_name (str, optional): Virtual table name
189
"""
190
191
class VirtualMemory:
192
"""Virtual/generated data adapter."""
193
194
def __init__(self, columns, rows=None, **kwargs):
195
"""
196
Initialize virtual memory adapter.
197
198
Parameters:
199
- columns (dict): Column definitions
200
- rows (list, optional): Static row data
201
- generator (callable, optional): Dynamic row generator
202
"""
203
204
class HolidaysMemory:
205
"""Holiday data adapter."""
206
207
def __init__(self, country='US', years=None, **kwargs):
208
"""
209
Initialize holidays adapter.
210
211
Parameters:
212
- country (str): Country code for holidays
213
- years (list, optional): Years to include
214
"""
215
```
216
217
### Adapter Registry
218
219
System for loading and managing available adapters dynamically.
220
221
```python { .api }
222
class AdapterLoader:
223
"""Loads and manages data source adapters."""
224
225
def load_adapters(self):
226
"""
227
Load all available adapters from entry points.
228
229
Returns:
230
list: List of adapter classes
231
"""
232
233
def find_adapter(self, uri, adapter_kwargs=None):
234
"""
235
Find appropriate adapter for URI.
236
237
Parameters:
238
- uri (str): Data source URI
239
- adapter_kwargs (dict, optional): Adapter configurations
240
241
Returns:
242
tuple: (adapter_class, args, kwargs)
243
"""
244
```
245
246
## Built-in Adapters
247
248
### API Data Sources
249
250
- **gsheetsapi** (`GSheetsAPI`): Google Sheets integration with authentication and caching
251
- **githubapi** (`GitHubAPI`): GitHub repositories, issues, pull requests, and user data
252
- **genericjsonapi** (`GenericJSONAPI`): Generic JSON-based REST APIs with configurable parsing
253
- **genericxmlapi** (`GenericXMLAPI`): XML-based APIs and RSS/Atom feeds
254
- **htmltableapi** (`HTMLTableAPI`): HTML table extraction from web pages with CSS selectors
255
- **datasetteapi** (`DatasetteAPI`): Datasette database publishing platform integration
256
- **dbtmetricflowapi** (`DbtMetricFlowAPI`): dbt MetricFlow semantic layer queries
257
- **presetapi** (`PresetAPI`): Apache Superset dashboard and chart data
258
- **presetworkspaceapi** (`PresetWorkspaceAPI`): Preset.io workspace integration
259
- **s3selectapi** (`S3SelectAPI`): AWS S3 object querying with S3 Select
260
- **socrataapi** (`SocrataAPI`): Socrata Open Data platform datasets
261
- **systemapi** (`SystemAPI`): System metrics, processes, and monitoring data
262
- **weatherapi** (`WeatherAPI`): Weather service APIs and meteorological data
263
264
### File Data Sources
265
266
- **csvfile** (`CSVFile`): CSV files from local filesystem or HTTP(S) URLs
267
268
### Memory Data Sources
269
270
- **pandasmemory** (`PandasMemory`): Pandas DataFrames as virtual SQL tables
271
- **holidaysmemory** (`HolidaysMemory`): Holiday and calendar data for various countries
272
- **virtualmemory** (`VirtualMemory`): Generated or computed data with custom logic
273
274
## Usage Examples
275
276
### Querying Google Sheets
277
278
```python
279
from shillelagh.backends.apsw.db import connect
280
281
# Configure Google Sheets access
282
adapter_kwargs = {
283
'gsheetsapi': {
284
'service_account_file': '/path/to/credentials.json',
285
'subject': 'user@example.com'
286
}
287
}
288
289
connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
290
cursor = connection.cursor()
291
292
# Query Google Sheet
293
sheet_url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgvE2upms/edit"
294
cursor.execute(f"""
295
SELECT Name, Major, "Number of Years"
296
FROM "{sheet_url}"
297
WHERE Major = 'English'
298
""")
299
300
for row in cursor.fetchall():
301
print(row)
302
```
303
304
### Custom Adapter Configuration
305
306
```python
307
from shillelagh.backends.apsw.db import connect
308
309
# Configure multiple adapters
310
adapter_kwargs = {
311
'githubapi': {'token': 'ghp_your_token_here'},
312
'genericjsonapi': {'headers': {'User-Agent': 'MyApp/1.0'}},
313
's3selectapi': {
314
'aws_access_key_id': 'your_key',
315
'aws_secret_access_key': 'your_secret'
316
}
317
}
318
319
connection = connect(":memory:", adapter_kwargs=adapter_kwargs)
320
cursor = connection.cursor()
321
322
# Query GitHub API
323
cursor.execute("""
324
SELECT name, stargazers_count, language
325
FROM "https://api.github.com/users/octocat/repos"
326
WHERE language = 'Python'
327
ORDER BY stargazers_count DESC
328
""")
329
```
330
331
### Using Pandas DataFrames
332
333
```python
334
import pandas as pd
335
from shillelagh.backends.apsw.db import connect
336
337
# Create DataFrame
338
df = pd.DataFrame({
339
'name': ['Alice', 'Bob', 'Charlie'],
340
'age': [25, 30, 35],
341
'city': ['NYC', 'LA', 'Chicago']
342
})
343
344
# Register DataFrame as virtual table
345
connection = connect(":memory:")
346
connection.execute("CREATE VIRTUAL TABLE people USING pandasmemory", df)
347
348
# Query DataFrame with SQL
349
cursor = connection.cursor()
350
cursor.execute("SELECT name, age FROM people WHERE age > 25 ORDER BY age")
351
results = cursor.fetchall()
352
```