or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

analytics-operations.mdasync-operations.mdcluster-operations.mddocument-operations.mdindex.mdmanagement-operations.mdn1ql-queries.mdsearch-operations.mdsubdocument-operations.mdview-operations.md

analytics-operations.mddocs/

0

# Analytics Operations

1

2

Analytics query execution for complex data analysis and reporting. Supports large-scale analytical workloads with integration to external data sources and advanced SQL++ analytics capabilities.

3

4

## Capabilities

5

6

### Analytics Query Execution

7

8

Execute analytics queries with various options and consistency levels.

9

10

```python { .api }

11

class Cluster:

12

def analytics_query(self, statement: str, options: AnalyticsOptions = None) -> AnalyticsResult:

13

"""

14

Execute Analytics query.

15

16

Args:

17

statement (str): Analytics query statement

18

options (AnalyticsOptions, optional): Analytics execution options

19

20

Returns:

21

AnalyticsResult: Analytics results iterator

22

23

Raises:

24

AnalyticsException: If analytics execution fails

25

TimeoutException: If query times out

26

"""

27

28

class AnalyticsOptions:

29

def __init__(self, timeout: timedelta = None,

30

scan_consistency: AnalyticsScanConsistency = None,

31

client_context_id: str = None,

32

priority: bool = False,

33

readonly: bool = None,

34

raw: Dict[str, Any] = None,

35

**kwargs):

36

"""

37

Analytics query execution options.

38

39

Args:

40

timeout (timedelta, optional): Query timeout

41

scan_consistency (AnalyticsScanConsistency, optional): Consistency level

42

client_context_id (str, optional): Client context identifier

43

priority (bool): High priority query flag

44

readonly (bool, optional): Read-only query flag

45

raw (Dict[str, Any], optional): Raw analytics options

46

**kwargs: Named parameters for parameterized queries

47

"""

48

49

def named_parameters(self, **params) -> AnalyticsOptions:

50

"""

51

Set named parameters for query.

52

53

Args:

54

**params: Named parameter values

55

56

Returns:

57

AnalyticsOptions: Options with parameters set

58

"""

59

60

def positional_parameters(self, *params) -> AnalyticsOptions:

61

"""

62

Set positional parameters for query.

63

64

Args:

65

*params: Positional parameter values

66

67

Returns:

68

AnalyticsOptions: Options with parameters set

69

"""

70

```

71

72

### Analytics Results and Metadata

73

74

Access analytics results and execution metadata.

75

76

```python { .api }

77

class AnalyticsResult:

78

def __iter__(self) -> Iterator[dict]:

79

"""Iterate over analytics result rows."""

80

81

def metadata(self) -> AnalyticsMetaData:

82

"""Get analytics execution metadata."""

83

84

def rows(self) -> List[dict]:

85

"""Get all result rows as list."""

86

87

class AnalyticsMetaData:

88

@property

89

def request_id(self) -> str:

90

"""Analytics request identifier."""

91

92

@property

93

def client_context_id(self) -> str:

94

"""Client context identifier."""

95

96

@property

97

def status(self) -> AnalyticsStatus:

98

"""Analytics execution status."""

99

100

@property

101

def signature(self) -> dict:

102

"""Analytics result signature."""

103

104

@property

105

def metrics(self) -> AnalyticsMetrics:

106

"""Analytics execution metrics."""

107

108

@property

109

def warnings(self) -> List[AnalyticsWarning]:

110

"""Analytics execution warnings."""

111

112

class AnalyticsMetrics:

113

@property

114

def elapsed_time(self) -> timedelta:

115

"""Total analytics execution time."""

116

117

@property

118

def execution_time(self) -> timedelta:

119

"""Analytics execution time."""

120

121

@property

122

def result_count(self) -> int:

123

"""Number of result rows."""

124

125

@property

126

def result_size(self) -> int:

127

"""Size of results in bytes."""

128

129

@property

130

def processed_objects(self) -> int:

131

"""Number of objects processed."""

132

133

@property

134

def error_count(self) -> int:

135

"""Number of errors encountered."""

136

137

@property

138

def warning_count(self) -> int:

139

"""Number of warnings generated."""

140

141

class AnalyticsWarning:

142

@property

143

def code(self) -> int:

144

"""Warning code."""

145

146

@property

147

def message(self) -> str:

148

"""Warning message."""

149

150

class AnalyticsStatus:

151

RUNNING = "running"

152

SUCCESS = "success"

153

ERRORS = "errors"

154

COMPLETED = "completed"

155

STOPPED = "stopped"

156

TIMEOUT = "timeout"

157

CLOSED = "closed"

158

FATAL = "fatal"

159

160

class AnalyticsScanConsistency:

161

NOT_BOUNDED = "not_bounded" # Fastest, may return stale data

162

REQUEST_PLUS = "request_plus" # Consistent with mutations

163

```

164

165

### Analytics Index Management

166

167

Manage analytics indexes, datasets, and external links.

168

169

```python { .api }

170

class AnalyticsIndexManager:

171

def create_dataverse(self, dataverse_name: str, options: CreateAnalyticsDataverseOptions = None) -> None:

172

"""

173

Create analytics dataverse.

174

175

Args:

176

dataverse_name (str): Dataverse name

177

options (CreateAnalyticsDataverseOptions, optional): Creation options

178

179

Raises:

180

DataverseExistsException: If dataverse already exists

181

"""

182

183

def drop_dataverse(self, dataverse_name: str, options: DropAnalyticsDataverseOptions = None) -> None:

184

"""

185

Drop analytics dataverse.

186

187

Args:

188

dataverse_name (str): Dataverse name

189

options (DropAnalyticsDataverseOptions, optional): Drop options

190

191

Raises:

192

DataverseNotFoundException: If dataverse doesn't exist

193

"""

194

195

def create_dataset(self, dataset_name: str, bucket_name: str, options: CreateAnalyticsDatasetOptions = None) -> None:

196

"""

197

Create analytics dataset.

198

199

Args:

200

dataset_name (str): Dataset name

201

bucket_name (str): Source bucket name

202

options (CreateAnalyticsDatasetOptions, optional): Creation options

203

204

Raises:

205

DatasetExistsException: If dataset already exists

206

"""

207

208

def drop_dataset(self, dataset_name: str, options: DropAnalyticsDatasetOptions = None) -> None:

209

"""

210

Drop analytics dataset.

211

212

Args:

213

dataset_name (str): Dataset name

214

options (DropAnalyticsDatasetOptions, optional): Drop options

215

216

Raises:

217

DatasetNotFoundException: If dataset doesn't exist

218

"""

219

220

def create_index(self, index_name: str, dataset_name: str, fields: Dict[str, AnalyticsDataType], options: CreateAnalyticsIndexOptions = None) -> None:

221

"""

222

Create analytics index.

223

224

Args:

225

index_name (str): Index name

226

dataset_name (str): Dataset name

227

fields (Dict[str, AnalyticsDataType]): Index fields and types

228

options (CreateAnalyticsIndexOptions, optional): Creation options

229

230

Raises:

231

IndexExistsException: If index already exists

232

"""

233

234

def drop_index(self, index_name: str, dataset_name: str, options: DropAnalyticsIndexOptions = None) -> None:

235

"""

236

Drop analytics index.

237

238

Args:

239

index_name (str): Index name

240

dataset_name (str): Dataset name

241

options (DropAnalyticsIndexOptions, optional): Drop options

242

243

Raises:

244

IndexNotFoundException: If index doesn't exist

245

"""

246

247

def get_all_datasets(self, options: GetAllAnalyticsDatasetsOptions = None) -> List[AnalyticsDataset]:

248

"""

249

Get all analytics datasets.

250

251

Args:

252

options (GetAllAnalyticsDatasetsOptions, optional): Retrieval options

253

254

Returns:

255

List[AnalyticsDataset]: All datasets

256

"""

257

258

def get_all_indexes(self, options: GetAllAnalyticsIndexesOptions = None) -> List[AnalyticsIndex]:

259

"""

260

Get all analytics indexes.

261

262

Args:

263

options (GetAllAnalyticsIndexesOptions, optional): Retrieval options

264

265

Returns:

266

List[AnalyticsIndex]: All indexes

267

"""

268

269

def connect_link(self, options: ConnectAnalyticsLinkOptions = None) -> None:

270

"""

271

Connect analytics link.

272

273

Args:

274

options (ConnectAnalyticsLinkOptions, optional): Connection options

275

"""

276

277

def disconnect_link(self, options: DisconnectAnalyticsLinkOptions = None) -> None:

278

"""

279

Disconnect analytics link.

280

281

Args:

282

options (DisconnectAnalyticsLinkOptions, optional): Disconnection options

283

"""

284

285

def create_link(self, link: AnalyticsLink, options: CreateAnalyticsLinkOptions = None) -> None:

286

"""

287

Create external analytics link.

288

289

Args:

290

link (AnalyticsLink): Link configuration

291

options (CreateAnalyticsLinkOptions, optional): Creation options

292

293

Raises:

294

LinkExistsException: If link already exists

295

"""

296

297

def replace_link(self, link: AnalyticsLink, options: ReplaceAnalyticsLinkOptions = None) -> None:

298

"""

299

Replace external analytics link.

300

301

Args:

302

link (AnalyticsLink): New link configuration

303

options (ReplaceAnalyticsLinkOptions, optional): Replace options

304

305

Raises:

306

LinkNotFoundException: If link doesn't exist

307

"""

308

309

def drop_link(self, link_name: str, dataverse_name: str, options: DropAnalyticsLinkOptions = None) -> None:

310

"""

311

Drop external analytics link.

312

313

Args:

314

link_name (str): Link name

315

dataverse_name (str): Dataverse name

316

options (DropAnalyticsLinkOptions, optional): Drop options

317

318

Raises:

319

LinkNotFoundException: If link doesn't exist

320

"""

321

322

def get_links(self, options: GetAnalyticsLinksOptions = None) -> List[AnalyticsLink]:

323

"""

324

Get all analytics links.

325

326

Args:

327

options (GetAnalyticsLinksOptions, optional): Retrieval options

328

329

Returns:

330

List[AnalyticsLink]: All analytics links

331

"""

332

```

333

334

## Analytics Schema Types

335

336

```python { .api }

337

class AnalyticsDataset:

338

@property

339

def name(self) -> str:

340

"""Dataset name."""

341

342

@property

343

def dataverse_name(self) -> str:

344

"""Dataverse containing the dataset."""

345

346

@property

347

def link_name(self) -> str:

348

"""Link name for external datasets."""

349

350

@property

351

def bucket_name(self) -> str:

352

"""Source bucket name."""

353

354

class AnalyticsIndex:

355

@property

356

def name(self) -> str:

357

"""Index name."""

358

359

@property

360

def dataset_name(self) -> str:

361

"""Dataset containing the index."""

362

363

@property

364

def dataverse_name(self) -> str:

365

"""Dataverse containing the index."""

366

367

@property

368

def is_primary(self) -> bool:

369

"""Whether this is a primary index."""

370

371

class AnalyticsLink:

372

"""Base class for analytics links."""

373

374

class CouchbaseRemoteAnalyticsLink(AnalyticsLink):

375

def __init__(self, name: str, dataverse: str, hostname: str,

376

username: str, password: str = None,

377

encryption: AnalyticsEncryptionLevel = None):

378

"""

379

Remote Couchbase cluster link.

380

381

Args:

382

name (str): Link name

383

dataverse (str): Dataverse name

384

hostname (str): Remote cluster hostname

385

username (str): Remote cluster username

386

password (str, optional): Remote cluster password

387

encryption (AnalyticsEncryptionLevel, optional): Encryption level

388

"""

389

390

class S3ExternalAnalyticsLink(AnalyticsLink):

391

def __init__(self, name: str, dataverse: str, access_key_id: str,

392

secret_access_key: str, region: str,

393

service_endpoint: str = None):

394

"""

395

Amazon S3 external link.

396

397

Args:

398

name (str): Link name

399

dataverse (str): Dataverse name

400

access_key_id (str): AWS access key ID

401

secret_access_key (str): AWS secret access key

402

region (str): AWS region

403

service_endpoint (str, optional): Custom S3 endpoint

404

"""

405

406

class AzureBlobExternalAnalyticsLink(AnalyticsLink):

407

def __init__(self, name: str, dataverse: str, connection_string: str = None,

408

account_name: str = None, account_key: str = None,

409

shared_access_signature: str = None, blob_endpoint: str = None):

410

"""

411

Azure Blob Storage external link.

412

413

Args:

414

name (str): Link name

415

dataverse (str): Dataverse name

416

connection_string (str, optional): Azure connection string

417

account_name (str, optional): Azure account name

418

account_key (str, optional): Azure account key

419

shared_access_signature (str, optional): Azure SAS token

420

blob_endpoint (str, optional): Azure blob endpoint

421

"""

422

423

class AnalyticsDataType:

424

STRING = "string"

425

INT64 = "int64"

426

DOUBLE = "double"

427

BOOLEAN = "boolean"

428

DATETIME = "datetime"

429

UUID = "uuid"

430

431

class AnalyticsEncryptionLevel:

432

NONE = "none"

433

HALF = "half"

434

FULL = "full"

435

436

class AnalyticsLinkType:

437

COUCHBASE_REMOTE = "couchbase"

438

S3_EXTERNAL = "s3"

439

AZURE_EXTERNAL = "azureblob"

440

```

441

442

## Usage Examples

443

444

### Basic Analytics Queries

445

446

```python

447

from couchbase.options import AnalyticsOptions, AnalyticsScanConsistency

448

449

# Simple analytics query

450

query = """

451

SELECT country, COUNT(*) as hotel_count

452

FROM `travel-sample`

453

WHERE type = 'hotel'

454

GROUP BY country

455

ORDER BY hotel_count DESC

456

LIMIT 10

457

"""

458

459

result = cluster.analytics_query(query)

460

461

for row in result:

462

print(f"{row['country']}: {row['hotel_count']} hotels")

463

464

# Get metadata

465

metadata = result.metadata()

466

print(f"Query took: {metadata.metrics.elapsed_time}")

467

print(f"Processed: {metadata.metrics.processed_objects} objects")

468

```

469

470

### Parameterized Analytics Queries

471

472

```python

473

# Named parameters

474

query = """

475

SELECT name, country, reviews.ratings.Overall as rating

476

FROM `travel-sample`

477

WHERE type = $doc_type AND country = $country_name

478

AND reviews.ratings.Overall >= $min_rating

479

ORDER BY rating DESC

480

"""

481

482

options = AnalyticsOptions(

483

doc_type="hotel",

484

country_name="United States",

485

min_rating=4.0

486

)

487

488

result = cluster.analytics_query(query, options)

489

490

for row in result:

491

print(f"{row['name']} ({row['country']}): {row['rating']}")

492

493

# Positional parameters

494

query2 = """

495

SELECT AVG(reviews.ratings.Overall) as avg_rating

496

FROM `travel-sample`

497

WHERE type = ? AND country = ?

498

"""

499

500

options2 = AnalyticsOptions().positional_parameters("hotel", "France")

501

result2 = cluster.analytics_query(query2, options2)

502

503

for row in result2:

504

print(f"Average rating: {row['avg_rating']}")

505

```

506

507

### Complex Analytics Queries

508

509

```python

510

# Join operation

511

query = """

512

SELECT h.name as hotel_name, h.city, r.content as review_text, r.ratings.Overall as rating

513

FROM `travel-sample` h

514

UNNEST h.reviews r

515

WHERE h.type = 'hotel'

516

AND h.country = 'United Kingdom'

517

AND r.ratings.Overall >= 4

518

ORDER BY r.ratings.Overall DESC, h.name

519

LIMIT 20

520

"""

521

522

result = cluster.analytics_query(query)

523

524

for row in result:

525

print(f"{row['hotel_name']} ({row['city']}): {row['rating']}")

526

print(f" Review: {row['review_text'][:100]}...")

527

528

# Aggregation with window functions

529

query2 = """

530

SELECT country, city, name, reviews.ratings.Overall as rating,

531

ROW_NUMBER() OVER (PARTITION BY country ORDER BY reviews.ratings.Overall DESC) as rank

532

FROM `travel-sample`

533

WHERE type = 'hotel' AND reviews.ratings.Overall IS NOT MISSING

534

QUALIFY rank <= 3

535

ORDER BY country, rank

536

"""

537

538

result2 = cluster.analytics_query(query2)

539

540

current_country = None

541

for row in result2:

542

if row['country'] != current_country:

543

current_country = row['country']

544

print(f"\nTop hotels in {current_country}:")

545

print(f" {row['rank']}. {row['name']} ({row['city']}): {row['rating']}")

546

```

547

548

### Analytics with Consistency

549

550

```python

551

# Perform document updates

552

doc = {"type": "hotel", "name": "Analytics Test Hotel", "country": "TestLand"}

553

mutation_result = collection.upsert("hotel::analytics_test", doc)

554

555

# Query with consistency

556

from couchbase.mutation_state import MutationState

557

558

mutation_state = MutationState(mutation_result.mutation_token)

559

options = AnalyticsOptions(scan_consistency=AnalyticsScanConsistency.REQUEST_PLUS)

560

561

query = "SELECT * FROM `travel-sample` WHERE name = 'Analytics Test Hotel'"

562

result = cluster.analytics_query(query, options)

563

564

for row in result:

565

print(f"Found: {row['name']}")

566

```

567

568

### Analytics Index Management

569

570

```python

571

from couchbase.management.analytics import AnalyticsIndexManager, AnalyticsDataType

572

573

analytics_mgr = cluster.analytics_indexes()

574

575

# Create dataverse

576

analytics_mgr.create_dataverse("travel_analytics")

577

578

# Create dataset

579

analytics_mgr.create_dataset("hotels", "travel-sample",

580

CreateAnalyticsDatasetOptions(dataverse_name="travel_analytics"))

581

582

# Create index

583

index_fields = {

584

"country": AnalyticsDataType.STRING,

585

"city": AnalyticsDataType.STRING,

586

"rating": AnalyticsDataType.DOUBLE

587

}

588

589

analytics_mgr.create_index("hotel_location_idx", "hotels", index_fields,

590

CreateAnalyticsIndexOptions(dataverse_name="travel_analytics"))

591

592

# List all datasets

593

datasets = analytics_mgr.get_all_datasets()

594

for dataset in datasets:

595

print(f"Dataset: {dataset.name} in {dataset.dataverse_name}")

596

597

# List all indexes

598

indexes = analytics_mgr.get_all_indexes()

599

for index in indexes:

600

print(f"Index: {index.name} on {index.dataset_name}")

601

```

602

603

### External Data Links

604

605

```python

606

from couchbase.management.analytics import S3ExternalAnalyticsLink, AzureBlobExternalAnalyticsLink

607

608

analytics_mgr = cluster.analytics_indexes()

609

610

# Create S3 external link

611

s3_link = S3ExternalAnalyticsLink(

612

name="s3_data_link",

613

dataverse="external_data",

614

access_key_id="AKIAIOSFODNN7EXAMPLE",

615

secret_access_key="wJalrXUtnFEMI/K7MDENG/bPxRfiCYEXAMPLEKEY",

616

region="us-west-2"

617

)

618

619

analytics_mgr.create_link(s3_link)

620

621

# Create Azure Blob link

622

azure_link = AzureBlobExternalAnalyticsLink(

623

name="azure_data_link",

624

dataverse="external_data",

625

account_name="mystorageaccount",

626

account_key="myaccountkey"

627

)

628

629

analytics_mgr.create_link(azure_link)

630

631

# Query external data

632

external_query = """

633

SELECT *

634

FROM EXTERNAL `s3://my-bucket/data/*.json`

635

USING `s3_data_link`

636

LIMIT 10

637

"""

638

639

result = cluster.analytics_query(external_query)

640

for row in result:

641

print(row)

642

```

643

644

### Error Handling

645

646

```python

647

from couchbase.exceptions import AnalyticsException, TimeoutException

648

649

try:

650

query = "SELECT * FROM `nonexistent-bucket`"

651

result = cluster.analytics_query(query)

652

for row in result:

653

print(row)

654

except AnalyticsException as e:

655

print(f"Analytics query failed: {e}")

656

if hasattr(e, 'context'):

657

print(f"Query: {e.context.statement}")

658

print(f"Error details: {e.context.errors}")

659

except TimeoutException:

660

print("Analytics query timed out")

661

662

# Handle warnings

663

try:

664

query = "SELECT * FROM `travel-sample` WHERE deprecated_field IS NOT MISSING"

665

result = cluster.analytics_query(query)

666

667

metadata = result.metadata()

668

if metadata.warnings:

669

for warning in metadata.warnings:

670

print(f"Warning {warning.code}: {warning.message}")

671

672

for row in result:

673

print(row)

674

except AnalyticsException as e:

675

print(f"Analytics error: {e}")

676

```

677

678

### Performance Monitoring

679

680

```python

681

# Query with detailed metrics

682

options = AnalyticsOptions(

683

client_context_id="performance_test_001",

684

priority=True # High priority query

685

)

686

687

query = """

688

SELECT country, AVG(reviews.ratings.Overall) as avg_rating,

689

COUNT(*) as hotel_count

690

FROM `travel-sample`

691

WHERE type = 'hotel' AND reviews.ratings.Overall IS NOT MISSING

692

GROUP BY country

693

HAVING COUNT(*) >= 10

694

ORDER BY avg_rating DESC

695

"""

696

697

result = cluster.analytics_query(query, options)

698

699

# Process results

700

results_list = []

701

for row in result:

702

results_list.append(row)

703

704

# Analyze performance

705

metadata = result.metadata()

706

metrics = metadata.metrics

707

708

print(f"Analytics Performance Report:")

709

print(f" Request ID: {metadata.request_id}")

710

print(f" Total Time: {metrics.elapsed_time}")

711

print(f" Execution Time: {metrics.execution_time}")

712

print(f" Objects Processed: {metrics.processed_objects}")

713

print(f" Result Count: {metrics.result_count}")

714

print(f" Result Size: {metrics.result_size} bytes")

715

print(f" Warnings: {metrics.warning_count}")

716

print(f" Errors: {metrics.error_count}")

717

718

print(f"\nTop 5 Countries by Hotel Rating:")

719

for i, row in enumerate(results_list[:5]):

720

print(f" {i+1}. {row['country']}: {row['avg_rating']:.2f} ({row['hotel_count']} hotels)")

721

```