0
# Analytics Operations
1
2
Analytics query execution for complex data analysis and reporting. Supports large-scale analytical workloads with integration to external data sources and advanced SQL++ analytics capabilities.
3
4
## Capabilities
5
6
### Analytics Query Execution
7
8
Execute analytics queries with various options and consistency levels.
9
10
```python { .api }
11
class Cluster:
12
def analytics_query(self, statement: str, options: AnalyticsOptions = None) -> AnalyticsResult:
13
"""
14
Execute Analytics query.
15
16
Args:
17
statement (str): Analytics query statement
18
options (AnalyticsOptions, optional): Analytics execution options
19
20
Returns:
21
AnalyticsResult: Analytics results iterator
22
23
Raises:
24
AnalyticsException: If analytics execution fails
25
TimeoutException: If query times out
26
"""
27
28
class AnalyticsOptions:
29
def __init__(self, timeout: timedelta = None,
30
scan_consistency: AnalyticsScanConsistency = None,
31
client_context_id: str = None,
32
priority: bool = False,
33
readonly: bool = None,
34
raw: Dict[str, Any] = None,
35
**kwargs):
36
"""
37
Analytics query execution options.
38
39
Args:
40
timeout (timedelta, optional): Query timeout
41
scan_consistency (AnalyticsScanConsistency, optional): Consistency level
42
client_context_id (str, optional): Client context identifier
43
priority (bool): High priority query flag
44
readonly (bool, optional): Read-only query flag
45
raw (Dict[str, Any], optional): Raw analytics options
46
**kwargs: Named parameters for parameterized queries
47
"""
48
49
def named_parameters(self, **params) -> AnalyticsOptions:
50
"""
51
Set named parameters for query.
52
53
Args:
54
**params: Named parameter values
55
56
Returns:
57
AnalyticsOptions: Options with parameters set
58
"""
59
60
def positional_parameters(self, *params) -> AnalyticsOptions:
61
"""
62
Set positional parameters for query.
63
64
Args:
65
*params: Positional parameter values
66
67
Returns:
68
AnalyticsOptions: Options with parameters set
69
"""
70
```
71
72
### Analytics Results and Metadata
73
74
Access analytics results and execution metadata.
75
76
```python { .api }
77
class AnalyticsResult:
78
def __iter__(self) -> Iterator[dict]:
79
"""Iterate over analytics result rows."""
80
81
def metadata(self) -> AnalyticsMetaData:
82
"""Get analytics execution metadata."""
83
84
def rows(self) -> List[dict]:
85
"""Get all result rows as list."""
86
87
class AnalyticsMetaData:
88
@property
89
def request_id(self) -> str:
90
"""Analytics request identifier."""
91
92
@property
93
def client_context_id(self) -> str:
94
"""Client context identifier."""
95
96
@property
97
def status(self) -> AnalyticsStatus:
98
"""Analytics execution status."""
99
100
@property
101
def signature(self) -> dict:
102
"""Analytics result signature."""
103
104
@property
105
def metrics(self) -> AnalyticsMetrics:
106
"""Analytics execution metrics."""
107
108
@property
109
def warnings(self) -> List[AnalyticsWarning]:
110
"""Analytics execution warnings."""
111
112
class AnalyticsMetrics:
113
@property
114
def elapsed_time(self) -> timedelta:
115
"""Total analytics execution time."""
116
117
@property
118
def execution_time(self) -> timedelta:
119
"""Analytics execution time."""
120
121
@property
122
def result_count(self) -> int:
123
"""Number of result rows."""
124
125
@property
126
def result_size(self) -> int:
127
"""Size of results in bytes."""
128
129
@property
130
def processed_objects(self) -> int:
131
"""Number of objects processed."""
132
133
@property
134
def error_count(self) -> int:
135
"""Number of errors encountered."""
136
137
@property
138
def warning_count(self) -> int:
139
"""Number of warnings generated."""
140
141
class AnalyticsWarning:
142
@property
143
def code(self) -> int:
144
"""Warning code."""
145
146
@property
147
def message(self) -> str:
148
"""Warning message."""
149
150
class AnalyticsStatus:
151
RUNNING = "running"
152
SUCCESS = "success"
153
ERRORS = "errors"
154
COMPLETED = "completed"
155
STOPPED = "stopped"
156
TIMEOUT = "timeout"
157
CLOSED = "closed"
158
FATAL = "fatal"
159
160
class AnalyticsScanConsistency:
161
NOT_BOUNDED = "not_bounded" # Fastest, may return stale data
162
REQUEST_PLUS = "request_plus" # Consistent with mutations
163
```
164
165
### Analytics Index Management
166
167
Manage analytics indexes, datasets, and external links.
168
169
```python { .api }
170
class AnalyticsIndexManager:
171
def create_dataverse(self, dataverse_name: str, options: CreateAnalyticsDataverseOptions = None) -> None:
172
"""
173
Create analytics dataverse.
174
175
Args:
176
dataverse_name (str): Dataverse name
177
options (CreateAnalyticsDataverseOptions, optional): Creation options
178
179
Raises:
180
DataverseExistsException: If dataverse already exists
181
"""
182
183
def drop_dataverse(self, dataverse_name: str, options: DropAnalyticsDataverseOptions = None) -> None:
184
"""
185
Drop analytics dataverse.
186
187
Args:
188
dataverse_name (str): Dataverse name
189
options (DropAnalyticsDataverseOptions, optional): Drop options
190
191
Raises:
192
DataverseNotFoundException: If dataverse doesn't exist
193
"""
194
195
def create_dataset(self, dataset_name: str, bucket_name: str, options: CreateAnalyticsDatasetOptions = None) -> None:
196
"""
197
Create analytics dataset.
198
199
Args:
200
dataset_name (str): Dataset name
201
bucket_name (str): Source bucket name
202
options (CreateAnalyticsDatasetOptions, optional): Creation options
203
204
Raises:
205
DatasetExistsException: If dataset already exists
206
"""
207
208
def drop_dataset(self, dataset_name: str, options: DropAnalyticsDatasetOptions = None) -> None:
209
"""
210
Drop analytics dataset.
211
212
Args:
213
dataset_name (str): Dataset name
214
options (DropAnalyticsDatasetOptions, optional): Drop options
215
216
Raises:
217
DatasetNotFoundException: If dataset doesn't exist
218
"""
219
220
def create_index(self, index_name: str, dataset_name: str, fields: Dict[str, AnalyticsDataType], options: CreateAnalyticsIndexOptions = None) -> None:
221
"""
222
Create analytics index.
223
224
Args:
225
index_name (str): Index name
226
dataset_name (str): Dataset name
227
fields (Dict[str, AnalyticsDataType]): Index fields and types
228
options (CreateAnalyticsIndexOptions, optional): Creation options
229
230
Raises:
231
IndexExistsException: If index already exists
232
"""
233
234
def drop_index(self, index_name: str, dataset_name: str, options: DropAnalyticsIndexOptions = None) -> None:
235
"""
236
Drop analytics index.
237
238
Args:
239
index_name (str): Index name
240
dataset_name (str): Dataset name
241
options (DropAnalyticsIndexOptions, optional): Drop options
242
243
Raises:
244
IndexNotFoundException: If index doesn't exist
245
"""
246
247
def get_all_datasets(self, options: GetAllAnalyticsDatasetsOptions = None) -> List[AnalyticsDataset]:
248
"""
249
Get all analytics datasets.
250
251
Args:
252
options (GetAllAnalyticsDatasetsOptions, optional): Retrieval options
253
254
Returns:
255
List[AnalyticsDataset]: All datasets
256
"""
257
258
def get_all_indexes(self, options: GetAllAnalyticsIndexesOptions = None) -> List[AnalyticsIndex]:
259
"""
260
Get all analytics indexes.
261
262
Args:
263
options (GetAllAnalyticsIndexesOptions, optional): Retrieval options
264
265
Returns:
266
List[AnalyticsIndex]: All indexes
267
"""
268
269
def connect_link(self, options: ConnectAnalyticsLinkOptions = None) -> None:
270
"""
271
Connect analytics link.
272
273
Args:
274
options (ConnectAnalyticsLinkOptions, optional): Connection options
275
"""
276
277
def disconnect_link(self, options: DisconnectAnalyticsLinkOptions = None) -> None:
278
"""
279
Disconnect analytics link.
280
281
Args:
282
options (DisconnectAnalyticsLinkOptions, optional): Disconnection options
283
"""
284
285
def create_link(self, link: AnalyticsLink, options: CreateAnalyticsLinkOptions = None) -> None:
286
"""
287
Create external analytics link.
288
289
Args:
290
link (AnalyticsLink): Link configuration
291
options (CreateAnalyticsLinkOptions, optional): Creation options
292
293
Raises:
294
LinkExistsException: If link already exists
295
"""
296
297
def replace_link(self, link: AnalyticsLink, options: ReplaceAnalyticsLinkOptions = None) -> None:
298
"""
299
Replace external analytics link.
300
301
Args:
302
link (AnalyticsLink): New link configuration
303
options (ReplaceAnalyticsLinkOptions, optional): Replace options
304
305
Raises:
306
LinkNotFoundException: If link doesn't exist
307
"""
308
309
def drop_link(self, link_name: str, dataverse_name: str, options: DropAnalyticsLinkOptions = None) -> None:
310
"""
311
Drop external analytics link.
312
313
Args:
314
link_name (str): Link name
315
dataverse_name (str): Dataverse name
316
options (DropAnalyticsLinkOptions, optional): Drop options
317
318
Raises:
319
LinkNotFoundException: If link doesn't exist
320
"""
321
322
def get_links(self, options: GetAnalyticsLinksOptions = None) -> List[AnalyticsLink]:
323
"""
324
Get all analytics links.
325
326
Args:
327
options (GetAnalyticsLinksOptions, optional): Retrieval options
328
329
Returns:
330
List[AnalyticsLink]: All analytics links
331
"""
332
```
333
334
## Analytics Schema Types
335
336
```python { .api }
337
class AnalyticsDataset:
338
@property
339
def name(self) -> str:
340
"""Dataset name."""
341
342
@property
343
def dataverse_name(self) -> str:
344
"""Dataverse containing the dataset."""
345
346
@property
347
def link_name(self) -> str:
348
"""Link name for external datasets."""
349
350
@property
351
def bucket_name(self) -> str:
352
"""Source bucket name."""
353
354
class AnalyticsIndex:
355
@property
356
def name(self) -> str:
357
"""Index name."""
358
359
@property
360
def dataset_name(self) -> str:
361
"""Dataset containing the index."""
362
363
@property
364
def dataverse_name(self) -> str:
365
"""Dataverse containing the index."""
366
367
@property
368
def is_primary(self) -> bool:
369
"""Whether this is a primary index."""
370
371
class AnalyticsLink:
372
"""Base class for analytics links."""
373
374
class CouchbaseRemoteAnalyticsLink(AnalyticsLink):
375
def __init__(self, name: str, dataverse: str, hostname: str,
376
username: str, password: str = None,
377
encryption: AnalyticsEncryptionLevel = None):
378
"""
379
Remote Couchbase cluster link.
380
381
Args:
382
name (str): Link name
383
dataverse (str): Dataverse name
384
hostname (str): Remote cluster hostname
385
username (str): Remote cluster username
386
password (str, optional): Remote cluster password
387
encryption (AnalyticsEncryptionLevel, optional): Encryption level
388
"""
389
390
class S3ExternalAnalyticsLink(AnalyticsLink):
391
def __init__(self, name: str, dataverse: str, access_key_id: str,
392
secret_access_key: str, region: str,
393
service_endpoint: str = None):
394
"""
395
Amazon S3 external link.
396
397
Args:
398
name (str): Link name
399
dataverse (str): Dataverse name
400
access_key_id (str): AWS access key ID
401
secret_access_key (str): AWS secret access key
402
region (str): AWS region
403
service_endpoint (str, optional): Custom S3 endpoint
404
"""
405
406
class AzureBlobExternalAnalyticsLink(AnalyticsLink):
407
def __init__(self, name: str, dataverse: str, connection_string: str = None,
408
account_name: str = None, account_key: str = None,
409
shared_access_signature: str = None, blob_endpoint: str = None):
410
"""
411
Azure Blob Storage external link.
412
413
Args:
414
name (str): Link name
415
dataverse (str): Dataverse name
416
connection_string (str, optional): Azure connection string
417
account_name (str, optional): Azure account name
418
account_key (str, optional): Azure account key
419
shared_access_signature (str, optional): Azure SAS token
420
blob_endpoint (str, optional): Azure blob endpoint
421
"""
422
423
class AnalyticsDataType:
424
STRING = "string"
425
INT64 = "int64"
426
DOUBLE = "double"
427
BOOLEAN = "boolean"
428
DATETIME = "datetime"
429
UUID = "uuid"
430
431
class AnalyticsEncryptionLevel:
432
NONE = "none"
433
HALF = "half"
434
FULL = "full"
435
436
class AnalyticsLinkType:
437
COUCHBASE_REMOTE = "couchbase"
438
S3_EXTERNAL = "s3"
439
AZURE_EXTERNAL = "azureblob"
440
```
441
442
## Usage Examples
443
444
### Basic Analytics Queries
445
446
```python
447
from couchbase.options import AnalyticsOptions, AnalyticsScanConsistency
448
449
# Simple analytics query
450
query = """
451
SELECT country, COUNT(*) as hotel_count
452
FROM `travel-sample`
453
WHERE type = 'hotel'
454
GROUP BY country
455
ORDER BY hotel_count DESC
456
LIMIT 10
457
"""
458
459
result = cluster.analytics_query(query)
460
461
for row in result:
462
print(f"{row['country']}: {row['hotel_count']} hotels")
463
464
# Get metadata
465
metadata = result.metadata()
466
print(f"Query took: {metadata.metrics.elapsed_time}")
467
print(f"Processed: {metadata.metrics.processed_objects} objects")
468
```
469
470
### Parameterized Analytics Queries
471
472
```python
473
# Named parameters
474
query = """
475
SELECT name, country, reviews.ratings.Overall as rating
476
FROM `travel-sample`
477
WHERE type = $doc_type AND country = $country_name
478
AND reviews.ratings.Overall >= $min_rating
479
ORDER BY rating DESC
480
"""
481
482
options = AnalyticsOptions(
483
doc_type="hotel",
484
country_name="United States",
485
min_rating=4.0
486
)
487
488
result = cluster.analytics_query(query, options)
489
490
for row in result:
491
print(f"{row['name']} ({row['country']}): {row['rating']}")
492
493
# Positional parameters
494
query2 = """
495
SELECT AVG(reviews.ratings.Overall) as avg_rating
496
FROM `travel-sample`
497
WHERE type = ? AND country = ?
498
"""
499
500
options2 = AnalyticsOptions().positional_parameters("hotel", "France")
501
result2 = cluster.analytics_query(query2, options2)
502
503
for row in result2:
504
print(f"Average rating: {row['avg_rating']}")
505
```
506
507
### Complex Analytics Queries
508
509
```python
510
# Join operation
511
query = """
512
SELECT h.name as hotel_name, h.city, r.content as review_text, r.ratings.Overall as rating
513
FROM `travel-sample` h
514
UNNEST h.reviews r
515
WHERE h.type = 'hotel'
516
AND h.country = 'United Kingdom'
517
AND r.ratings.Overall >= 4
518
ORDER BY r.ratings.Overall DESC, h.name
519
LIMIT 20
520
"""
521
522
result = cluster.analytics_query(query)
523
524
for row in result:
525
print(f"{row['hotel_name']} ({row['city']}): {row['rating']}")
526
print(f" Review: {row['review_text'][:100]}...")
527
528
# Aggregation with window functions
529
query2 = """
530
SELECT country, city, name, reviews.ratings.Overall as rating,
531
ROW_NUMBER() OVER (PARTITION BY country ORDER BY reviews.ratings.Overall DESC) as rank
532
FROM `travel-sample`
533
WHERE type = 'hotel' AND reviews.ratings.Overall IS NOT MISSING
534
QUALIFY rank <= 3
535
ORDER BY country, rank
536
"""
537
538
result2 = cluster.analytics_query(query2)
539
540
current_country = None
541
for row in result2:
542
if row['country'] != current_country:
543
current_country = row['country']
544
print(f"\nTop hotels in {current_country}:")
545
print(f" {row['rank']}. {row['name']} ({row['city']}): {row['rating']}")
546
```
547
548
### Analytics with Consistency
549
550
```python
551
# Perform document updates
552
doc = {"type": "hotel", "name": "Analytics Test Hotel", "country": "TestLand"}
553
mutation_result = collection.upsert("hotel::analytics_test", doc)
554
555
# Query with consistency
556
from couchbase.mutation_state import MutationState
557
558
mutation_state = MutationState(mutation_result.mutation_token)
559
options = AnalyticsOptions(scan_consistency=AnalyticsScanConsistency.REQUEST_PLUS)
560
561
query = "SELECT * FROM `travel-sample` WHERE name = 'Analytics Test Hotel'"
562
result = cluster.analytics_query(query, options)
563
564
for row in result:
565
print(f"Found: {row['name']}")
566
```
567
568
### Analytics Index Management
569
570
```python
571
from couchbase.management.analytics import AnalyticsIndexManager, AnalyticsDataType
572
573
analytics_mgr = cluster.analytics_indexes()
574
575
# Create dataverse
576
analytics_mgr.create_dataverse("travel_analytics")
577
578
# Create dataset
579
analytics_mgr.create_dataset("hotels", "travel-sample",
580
CreateAnalyticsDatasetOptions(dataverse_name="travel_analytics"))
581
582
# Create index
583
index_fields = {
584
"country": AnalyticsDataType.STRING,
585
"city": AnalyticsDataType.STRING,
586
"rating": AnalyticsDataType.DOUBLE
587
}
588
589
analytics_mgr.create_index("hotel_location_idx", "hotels", index_fields,
590
CreateAnalyticsIndexOptions(dataverse_name="travel_analytics"))
591
592
# List all datasets
593
datasets = analytics_mgr.get_all_datasets()
594
for dataset in datasets:
595
print(f"Dataset: {dataset.name} in {dataset.dataverse_name}")
596
597
# List all indexes
598
indexes = analytics_mgr.get_all_indexes()
599
for index in indexes:
600
print(f"Index: {index.name} on {index.dataset_name}")
601
```
602
603
### External Data Links
604
605
```python
606
from couchbase.management.analytics import S3ExternalAnalyticsLink, AzureBlobExternalAnalyticsLink
607
608
analytics_mgr = cluster.analytics_indexes()
609
610
# Create S3 external link
611
s3_link = S3ExternalAnalyticsLink(
612
name="s3_data_link",
613
dataverse="external_data",
614
access_key_id="AKIAIOSFODNN7EXAMPLE",
615
secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",
616
region="us-west-2"
617
)
618
619
analytics_mgr.create_link(s3_link)
620
621
# Create Azure Blob link
622
azure_link = AzureBlobExternalAnalyticsLink(
623
name="azure_data_link",
624
dataverse="external_data",
625
account_name="mystorageaccount",
626
account_key="myaccountkey"
627
)
628
629
analytics_mgr.create_link(azure_link)
630
631
# Query external data
632
external_query = """
633
SELECT *
634
FROM EXTERNAL `s3://my-bucket/data/*.json`
635
USING `s3_data_link`
636
LIMIT 10
637
"""
638
639
result = cluster.analytics_query(external_query)
640
for row in result:
641
print(row)
642
```
643
644
### Error Handling
645
646
```python
647
from couchbase.exceptions import AnalyticsException, TimeoutException
648
649
try:
650
query = "SELECT * FROM `nonexistent-bucket`"
651
result = cluster.analytics_query(query)
652
for row in result:
653
print(row)
654
except AnalyticsException as e:
655
print(f"Analytics query failed: {e}")
656
if hasattr(e, 'context'):
657
print(f"Query: {e.context.statement}")
658
print(f"Error details: {e.context.errors}")
659
except TimeoutException:
660
print("Analytics query timed out")
661
662
# Handle warnings
663
try:
664
query = "SELECT * FROM `travel-sample` WHERE deprecated_field IS NOT MISSING"
665
result = cluster.analytics_query(query)
666
667
metadata = result.metadata()
668
if metadata.warnings:
669
for warning in metadata.warnings:
670
print(f"Warning {warning.code}: {warning.message}")
671
672
for row in result:
673
print(row)
674
except AnalyticsException as e:
675
print(f"Analytics error: {e}")
676
```
677
678
### Performance Monitoring
679
680
```python
681
# Query with detailed metrics
682
options = AnalyticsOptions(
683
client_context_id="performance_test_001",
684
priority=True # High priority query
685
)
686
687
query = """
688
SELECT country, AVG(reviews.ratings.Overall) as avg_rating,
689
COUNT(*) as hotel_count
690
FROM `travel-sample`
691
WHERE type = 'hotel' AND reviews.ratings.Overall IS NOT MISSING
692
GROUP BY country
693
HAVING COUNT(*) >= 10
694
ORDER BY avg_rating DESC
695
"""
696
697
result = cluster.analytics_query(query, options)
698
699
# Process results
700
results_list = []
701
for row in result:
702
results_list.append(row)
703
704
# Analyze performance
705
metadata = result.metadata()
706
metrics = metadata.metrics
707
708
print(f"Analytics Performance Report:")
709
print(f" Request ID: {metadata.request_id}")
710
print(f" Total Time: {metrics.elapsed_time}")
711
print(f" Execution Time: {metrics.execution_time}")
712
print(f" Objects Processed: {metrics.processed_objects}")
713
print(f" Result Count: {metrics.result_count}")
714
print(f" Result Size: {metrics.result_size} bytes")
715
print(f" Warnings: {metrics.warning_count}")
716
print(f" Errors: {metrics.error_count}")
717
718
print(f"\nTop 5 Countries by Hotel Rating:")
719
for i, row in enumerate(results_list[:5]):
720
print(f" {i+1}. {row['country']}: {row['avg_rating']:.2f} ({row['hotel_count']} hotels)")
721
```