or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

bulk-operations.mdclient.mdfacets-aggregations.mdfilters.mdindex.mdmappings.mdquery-dsl.mdrivers.md

filters.mddocs/

0

# PyES Filter DSL

1

2

## Overview

3

4

The PyES Filter DSL provides fast, non-scored document filtering capabilities. Filters are executed in filter context, which means they don't affect document scores and are cached by ElasticSearch for better performance. Use filters when you need fast yes/no matching without relevance scoring.

5

6

## Base Filter Classes

7

8

### Filter Base Class

9

10

```python { .api }

11

class Filter:

12

"""

13

Base class for all ElasticSearch filters.

14

15

All filter classes inherit from this base class and implement

16

the serialize() method to convert to ElasticSearch JSON.

17

"""

18

19

def serialize(self):

20

"""

21

Convert filter to ElasticSearch JSON format.

22

23

Returns:

24

dict: ElasticSearch filter JSON

25

"""

26

pass

27

```

28

29

### FilterList Container

30

31

```python { .api }

32

class FilterList:

33

"""

34

Container for multiple filters that can be combined.

35

"""

36

37

def __init__(self, filters=None):

38

"""

39

Initialize FilterList.

40

41

Args:

42

filters (list, optional): Initial list of filters

43

"""

44

pass

45

46

def add(self, filter):

47

"""

48

Add filter to the list.

49

50

Args:

51

filter (Filter): Filter to add

52

"""

53

pass

54

```

55

56

## Logical Filters

57

58

### Boolean Filter

59

60

```python { .api }

61

class BoolFilter(Filter):

62

"""

63

Boolean combination of filters with must/must_not/should clauses.

64

65

Provides full boolean logic for complex filtering requirements.

66

"""

67

68

def __init__(self, must=None, must_not=None, should=None, **kwargs):

69

"""

70

Initialize BoolFilter.

71

72

Args:

73

must (Filter|list): Filters that must match (AND logic)

74

must_not (Filter|list): Filters that must not match (NOT logic)

75

should (Filter|list): Filters that should match (OR logic)

76

**kwargs: Additional boolean filter parameters

77

"""

78

pass

79

80

# Complex boolean filtering

81

from pyes import BoolFilter, TermFilter, RangeFilter, ExistsFilter

82

83

# Documents that must be published, must not be drafts,

84

# and should be either featured or have high views

85

bool_filter = BoolFilter(

86

must=[

87

TermFilter("status", "published"),

88

ExistsFilter("content")

89

],

90

must_not=[

91

TermFilter("category", "draft"),

92

TermFilter("author", "spam_user")

93

],

94

should=[

95

TermFilter("featured", True),

96

RangeFilter("view_count", gte=1000)

97

]

98

)

99

100

# Apply filter to search

101

from pyes import Search, MatchAllQuery

102

search = Search(MatchAllQuery()).filter(bool_filter)

103

results = es.search(search, indices=["blog"])

104

```

105

106

### AND Filter

107

108

```python { .api }

109

class ANDFilter(Filter):

110

"""

111

Logical AND combination of multiple filters.

112

113

All filters must match for document to pass.

114

"""

115

116

def __init__(self, filters):

117

"""

118

Initialize ANDFilter.

119

120

Args:

121

filters (list): List of filters to combine with AND logic

122

"""

123

pass

124

125

# AND filter combination

126

from pyes import ANDFilter, TermFilter, RangeFilter

127

128

# Must match all conditions

129

and_filter = ANDFilter([

130

TermFilter("status", "published"),

131

TermFilter("category", "tutorial"),

132

RangeFilter("published_date", gte="2023-01-01")

133

])

134

```

135

136

### OR Filter

137

138

```python { .api }

139

class ORFilter(Filter):

140

"""

141

Logical OR combination of multiple filters.

142

143

At least one filter must match for document to pass.

144

"""

145

146

def __init__(self, filters):

147

"""

148

Initialize ORFilter.

149

150

Args:

151

filters (list): List of filters to combine with OR logic

152

"""

153

pass

154

155

# OR filter combination

156

from pyes import ORFilter, TermFilter

157

158

# Match any of these categories

159

category_filter = ORFilter([

160

TermFilter("category", "tutorial"),

161

TermFilter("category", "guide"),

162

TermFilter("category", "reference")

163

])

164

```

165

166

### NOT Filter

167

168

```python { .api }

169

class NotFilter(Filter):

170

"""

171

Logical negation of a filter.

172

173

Documents that do NOT match the wrapped filter pass.

174

"""

175

176

def __init__(self, filter):

177

"""

178

Initialize NotFilter.

179

180

Args:

181

filter (Filter): Filter to negate

182

"""

183

pass

184

185

# NOT filter negation

186

from pyes import NotFilter, TermFilter

187

188

# Exclude draft documents

189

not_draft_filter = NotFilter(TermFilter("status", "draft"))

190

```

191

192

## Term-Based Filters

193

194

### Term Filter

195

196

```python { .api }

197

class TermFilter(Filter):

198

"""

199

Filter for exact term matching (not analyzed).

200

201

Use for keyword fields, IDs, status values, and exact matches.

202

"""

203

204

def __init__(self, field, value, **kwargs):

205

"""

206

Initialize TermFilter.

207

208

Args:

209

field (str): Field name

210

value (str|int|float|bool): Exact value to match

211

**kwargs: Additional term filter parameters

212

"""

213

pass

214

215

# Exact term filtering

216

from pyes import TermFilter

217

218

# Filter by exact status

219

status_filter = TermFilter("status", "published")

220

221

# Filter by numeric value

222

view_filter = TermFilter("view_count", 1000)

223

224

# Filter by boolean value

225

featured_filter = TermFilter("featured", True)

226

227

# Filter by keyword field

228

category_filter = TermFilter("category.keyword", "Python Tutorial")

229

```

230

231

### Terms Filter

232

233

```python { .api }

234

class TermsFilter(Filter):

235

"""

236

Filter for matching any of multiple exact terms.

237

238

Equivalent to multiple TermFilter with OR logic.

239

"""

240

241

def __init__(self, field, values, **kwargs):

242

"""

243

Initialize TermsFilter.

244

245

Args:

246

field (str): Field name

247

values (list): List of exact values to match

248

**kwargs: Additional terms filter parameters

249

"""

250

pass

251

252

# Multiple value filtering

253

from pyes import TermsFilter

254

255

# Match multiple categories

256

categories_filter = TermsFilter("category", ["tutorial", "guide", "reference"])

257

258

# Match multiple authors

259

authors_filter = TermsFilter("author.keyword", ["john_doe", "jane_smith", "bob_wilson"])

260

261

# Match multiple IDs

262

ids_filter = TermsFilter("_id", ["doc1", "doc2", "doc3"])

263

```

264

265

### Prefix Filter

266

267

```python { .api }

268

class PrefixFilter(Filter):

269

"""

270

Filter documents with terms that start with specified prefix.

271

"""

272

273

def __init__(self, field, prefix, **kwargs):

274

"""

275

Initialize PrefixFilter.

276

277

Args:

278

field (str): Field name

279

prefix (str): Prefix to match

280

**kwargs: Additional prefix filter parameters

281

"""

282

pass

283

284

# Prefix matching

285

from pyes import PrefixFilter

286

287

# Filter titles starting with "Python"

288

title_prefix_filter = PrefixFilter("title.keyword", "Python")

289

290

# Filter tags starting with "elastic"

291

tag_prefix_filter = PrefixFilter("tags", "elastic")

292

293

# Filter file paths starting with "/home/user"

294

path_prefix_filter = PrefixFilter("file_path", "/home/user")

295

```

296

297

### Regular Expression Filter

298

299

```python { .api }

300

class RegexTermFilter(Filter):

301

"""

302

Filter using regular expression pattern matching.

303

"""

304

305

def __init__(self, field, regex, **kwargs):

306

"""

307

Initialize RegexTermFilter.

308

309

Args:

310

field (str): Field name

311

regex (str): Regular expression pattern

312

**kwargs: Additional regex filter parameters

313

"""

314

pass

315

316

# Regular expression filtering

317

from pyes import RegexTermFilter

318

319

# Filter email addresses

320

email_filter = RegexTermFilter("email", r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}")

321

322

# Filter version numbers

323

version_filter = RegexTermFilter("version", r"[0-9]+\.[0-9]+\.[0-9]+")

324

325

# Filter phone numbers

326

phone_filter = RegexTermFilter("phone", r"\d{3}-\d{3}-\d{4}")

327

```

328

329

## Field Existence Filters

330

331

### Exists Filter

332

333

```python { .api }

334

class ExistsFilter(Filter):

335

"""

336

Filter documents where specified field exists and has a value.

337

"""

338

339

def __init__(self, field, **kwargs):

340

"""

341

Initialize ExistsFilter.

342

343

Args:

344

field (str): Field name to check for existence

345

**kwargs: Additional exists filter parameters

346

"""

347

pass

348

349

# Field existence filtering

350

from pyes import ExistsFilter

351

352

# Documents with email field

353

has_email_filter = ExistsFilter("email")

354

355

# Documents with thumbnail image

356

has_thumbnail_filter = ExistsFilter("thumbnail_url")

357

358

# Documents with geolocation

359

has_location_filter = ExistsFilter("location")

360

```

361

362

### Missing Filter

363

364

```python { .api }

365

class MissingFilter(Filter):

366

"""

367

Filter documents where specified field is missing or null.

368

"""

369

370

def __init__(self, field, **kwargs):

371

"""

372

Initialize MissingFilter.

373

374

Args:

375

field (str): Field name to check for absence

376

**kwargs: Additional missing filter parameters

377

"""

378

pass

379

380

# Field absence filtering

381

from pyes import MissingFilter

382

383

# Documents without email

384

no_email_filter = MissingFilter("email")

385

386

# Documents without tags

387

no_tags_filter = MissingFilter("tags")

388

389

# Documents without expiration date

390

no_expiry_filter = MissingFilter("expires_at")

391

```

392

393

## Range Filters

394

395

### Range Filter

396

397

```python { .api }

398

class RangeFilter(Filter):

399

"""

400

Filter for range-based matching (numeric, date, or string ranges).

401

402

More efficient than RangeQuery when scoring is not needed.

403

"""

404

405

def __init__(self, field, from_value=None, to_value=None,

406

include_lower=True, include_upper=True, **kwargs):

407

"""

408

Initialize RangeFilter.

409

410

Args:

411

field (str): Field name

412

from_value: Lower bound value (use gte/gt kwargs instead)

413

to_value: Upper bound value (use lte/lt kwargs instead)

414

include_lower (bool): Include lower bound. Default: True

415

include_upper (bool): Include upper bound. Default: True

416

**kwargs: Range parameters (gte, gt, lte, lt)

417

"""

418

pass

419

420

# Range filtering

421

from pyes import RangeFilter

422

423

# Date range filtering

424

date_filter = RangeFilter("published_date", gte="2023-01-01", lt="2024-01-01")

425

426

# Numeric range filtering

427

view_count_filter = RangeFilter("view_count", gte=100, lte=10000)

428

429

# Price range filtering

430

price_filter = RangeFilter("price", gte=10.00, lt=100.00)

431

432

# Age range filtering (exclusive bounds)

433

age_filter = RangeFilter("age", gt=18, lt=65)

434

435

# Score range filtering

436

score_filter = RangeFilter("rating", gte=4.0)

437

```

438

439

### Limit Filter

440

441

```python { .api }

442

class LimitFilter(Filter):

443

"""

444

Limit the number of documents processed by subsequent filters.

445

446

Useful for performance optimization with large datasets.

447

"""

448

449

def __init__(self, value, **kwargs):

450

"""

451

Initialize LimitFilter.

452

453

Args:

454

value (int): Maximum number of documents to process

455

**kwargs: Additional limit filter parameters

456

"""

457

pass

458

459

# Limit processing for performance

460

from pyes import LimitFilter

461

462

# Only process first 1000 documents

463

limit_filter = LimitFilter(1000)

464

```

465

466

## Geospatial Filters

467

468

### Geo Distance Filter

469

470

```python { .api }

471

class GeoDistanceFilter(Filter):

472

"""

473

Filter documents within specified distance from a point.

474

"""

475

476

def __init__(self, field=None, location=None, distance=None, **kwargs):

477

"""

478

Initialize GeoDistanceFilter.

479

480

Args:

481

field (str): Geo-point field name (can be specified in kwargs as field name)

482

location (dict): Point location {"lat": lat, "lon": lon}

483

distance (str): Distance with unit (e.g., "5km", "10mi")

484

**kwargs: Can specify location as field_name=location

485

"""

486

pass

487

488

# Geographic distance filtering

489

from pyes import GeoDistanceFilter

490

491

# Within 5km of specific coordinates

492

geo_filter = GeoDistanceFilter(

493

location={"lat": 40.7128, "lon": -74.0060},

494

distance="5km"

495

)

496

497

# Alternative syntax with field name

498

location_filter = GeoDistanceFilter(

499

restaurant_location={"lat": 40.7128, "lon": -74.0060},

500

distance="2mi"

501

)

502

503

# Within walking distance

504

walking_filter = GeoDistanceFilter(

505

store_location={"lat": 37.7749, "lon": -122.4194},

506

distance="500m"

507

)

508

```

509

510

### Geo Bounding Box Filter

511

512

```python { .api }

513

class GeoBoundingBoxFilter(Filter):

514

"""

515

Filter documents within a geographic bounding box.

516

"""

517

518

def __init__(self, field, **kwargs):

519

"""

520

Initialize GeoBoundingBoxFilter.

521

522

Args:

523

field (str): Geo-point field name

524

**kwargs: Bounding box coordinates (top_left, bottom_right or

525

top, left, bottom, right)

526

"""

527

pass

528

529

# Bounding box filtering

530

from pyes import GeoBoundingBoxFilter

531

532

# Documents within NYC area

533

nyc_filter = GeoBoundingBoxFilter(

534

"location",

535

top_left={"lat": 40.8176, "lon": -74.0431},

536

bottom_right={"lat": 40.4774, "lon": -73.7004}

537

)

538

539

# Alternative coordinate specification

540

area_filter = GeoBoundingBoxFilter(

541

"coordinates",

542

top=40.8176,

543

left=-74.0431,

544

bottom=40.4774,

545

right=-73.7004

546

)

547

```

548

549

### Geo Polygon Filter

550

551

```python { .api }

552

class GeoPolygonFilter(Filter):

553

"""

554

Filter documents within a geographic polygon.

555

"""

556

557

def __init__(self, field, points, **kwargs):

558

"""

559

Initialize GeoPolygonFilter.

560

561

Args:

562

field (str): Geo-point field name

563

points (list): List of polygon vertices as {"lat": lat, "lon": lon}

564

**kwargs: Additional geo polygon parameters

565

"""

566

pass

567

568

# Polygon area filtering

569

from pyes import GeoPolygonFilter

570

571

# Custom polygon area

572

polygon_filter = GeoPolygonFilter(

573

"location",

574

points=[

575

{"lat": 40.7589, "lon": -73.9851}, # Times Square

576

{"lat": 40.7505, "lon": -73.9934}, # Penn Station

577

{"lat": 40.7282, "lon": -74.0776}, # World Trade Center

578

{"lat": 40.7614, "lon": -73.9776} # Central Park

579

]

580

)

581

```

582

583

### Geo Shape Filter

584

585

```python { .api }

586

class GeoShapeFilter(Filter):

587

"""

588

Filter documents using complex geographic shapes.

589

"""

590

591

def __init__(self, field, shape=None, relation="intersects", **kwargs):

592

"""

593

Initialize GeoShapeFilter.

594

595

Args:

596

field (str): Geo-shape field name

597

shape (dict): GeoJSON shape definition

598

relation (str): Spatial relation (intersects, disjoint, within, contains)

599

**kwargs: Additional geo shape parameters

600

"""

601

pass

602

603

# Complex geo shape filtering

604

from pyes import GeoShapeFilter

605

606

# Circle shape

607

circle_filter = GeoShapeFilter(

608

"region",

609

shape={

610

"type": "circle",

611

"coordinates": [-73.9857, 40.7484],

612

"radius": "1km"

613

},

614

relation="intersects"

615

)

616

617

# Polygon shape

618

polygon_shape_filter = GeoShapeFilter(

619

"boundary",

620

shape={

621

"type": "polygon",

622

"coordinates": [[

623

[-74.0059, 40.7128],

624

[-74.0059, 40.7589],

625

[-73.9352, 40.7589],

626

[-73.9352, 40.7128],

627

[-74.0059, 40.7128]

628

]]

629

}

630

)

631

```

632

633

### Geo Indexed Shape Filter

634

635

```python { .api }

636

class GeoIndexedShapeFilter(Filter):

637

"""

638

Filter using pre-indexed shapes stored in ElasticSearch.

639

"""

640

641

def __init__(self, field, indexed_shape_index, indexed_shape_type,

642

indexed_shape_id, indexed_shape_path=None, relation="intersects", **kwargs):

643

"""

644

Initialize GeoIndexedShapeFilter.

645

646

Args:

647

field (str): Geo-shape field name

648

indexed_shape_index (str): Index containing the shape

649

indexed_shape_type (str): Type of the shape document

650

indexed_shape_id (str): ID of the shape document

651

indexed_shape_path (str, optional): Path to shape in document

652

relation (str): Spatial relation. Default: "intersects"

653

**kwargs: Additional parameters

654

"""

655

pass

656

657

# Use pre-indexed shapes

658

from pyes import GeoIndexedShapeFilter

659

660

# Filter using stored city boundary

661

city_filter = GeoIndexedShapeFilter(

662

"location",

663

indexed_shape_index="shapes",

664

indexed_shape_type="boundary",

665

indexed_shape_id="nyc_boundary",

666

relation="within"

667

)

668

```

669

670

## Document Structure Filters

671

672

### Has Child Filter

673

674

```python { .api }

675

class HasChildFilter(Filter):

676

"""

677

Filter parent documents that have child documents matching criteria.

678

"""

679

680

def __init__(self, type, filter=None, query=None, **kwargs):

681

"""

682

Initialize HasChildFilter.

683

684

Args:

685

type (str): Child document type

686

filter (Filter, optional): Filter for child documents

687

query (Query, optional): Query for child documents

688

**kwargs: Additional has_child parameters

689

"""

690

pass

691

692

# Parent-child filtering

693

from pyes import HasChildFilter, TermFilter

694

695

# Blog posts with approved comments

696

posts_with_approved_comments = HasChildFilter(

697

type="comment",

698

filter=TermFilter("status", "approved")

699

)

700

701

# Products with variants in stock

702

products_in_stock = HasChildFilter(

703

type="variant",

704

filter=RangeFilter("stock_quantity", gt=0)

705

)

706

```

707

708

### Has Parent Filter

709

710

```python { .api }

711

class HasParentFilter(Filter):

712

"""

713

Filter child documents that have parent documents matching criteria.

714

"""

715

716

def __init__(self, parent_type, filter=None, query=None, **kwargs):

717

"""

718

Initialize HasParentFilter.

719

720

Args:

721

parent_type (str): Parent document type

722

filter (Filter, optional): Filter for parent documents

723

query (Query, optional): Query for parent documents

724

**kwargs: Additional has_parent parameters

725

"""

726

pass

727

728

# Child document filtering based on parent

729

from pyes import HasParentFilter

730

731

# Comments on featured posts

732

comments_on_featured = HasParentFilter(

733

parent_type="post",

734

filter=TermFilter("featured", True)

735

)

736

737

# Variants of products on sale

738

variants_on_sale = HasParentFilter(

739

parent_type="product",

740

filter=RangeFilter("discount_percent", gt=0)

741

)

742

```

743

744

### Nested Filter

745

746

```python { .api }

747

class NestedFilter(Filter):

748

"""

749

Filter nested objects within documents.

750

"""

751

752

def __init__(self, path, filter=None, query=None, **kwargs):

753

"""

754

Initialize NestedFilter.

755

756

Args:

757

path (str): Path to nested object

758

filter (Filter, optional): Filter for nested objects

759

query (Query, optional): Query for nested objects

760

**kwargs: Additional nested parameters

761

"""

762

pass

763

764

# Nested object filtering

765

from pyes import NestedFilter, BoolFilter

766

767

# Products with red variants under $100

768

nested_filter = NestedFilter(

769

path="variants",

770

filter=BoolFilter(

771

must=[

772

TermFilter("variants.color", "red"),

773

RangeFilter("variants.price", lt=100)

774

]

775

)

776

)

777

```

778

779

### Type Filter

780

781

```python { .api }

782

class TypeFilter(Filter):

783

"""

784

Filter documents by their document type.

785

"""

786

787

def __init__(self, type, **kwargs):

788

"""

789

Initialize TypeFilter.

790

791

Args:

792

type (str): Document type to filter by

793

**kwargs: Additional type filter parameters

794

"""

795

pass

796

797

# Document type filtering

798

from pyes import TypeFilter

799

800

# Only blog post documents

801

posts_only = TypeFilter("blog_post")

802

803

# Only user documents

804

users_only = TypeFilter("user")

805

```

806

807

### IDs Filter

808

809

```python { .api }

810

class IdsFilter(Filter):

811

"""

812

Filter documents by their IDs.

813

"""

814

815

def __init__(self, values, type=None, **kwargs):

816

"""

817

Initialize IdsFilter.

818

819

Args:

820

values (list): List of document IDs

821

type (str|list, optional): Document type(s) to search in

822

**kwargs: Additional IDs filter parameters

823

"""

824

pass

825

826

# ID-based filtering

827

from pyes import IdsFilter

828

829

# Specific document IDs

830

specific_docs = IdsFilter(["doc1", "doc2", "doc3"])

831

832

# IDs within specific types

833

typed_ids = IdsFilter(["post1", "post2"], type="blog_post")

834

```

835

836

## Specialized Filters

837

838

### Query Filter

839

840

```python { .api }

841

class QueryFilter(Filter):

842

"""

843

Wrap a query as a filter (query in filter context).

844

845

Converts scoring queries to non-scoring filters.

846

"""

847

848

def __init__(self, query, **kwargs):

849

"""

850

Initialize QueryFilter.

851

852

Args:

853

query (Query): Query to use as filter

854

**kwargs: Additional query filter parameters

855

"""

856

pass

857

858

# Query as filter

859

from pyes import QueryFilter, MatchQuery

860

861

# Use text search as filter (no scoring)

862

text_filter = QueryFilter(MatchQuery("content", "python elasticsearch"))

863

```

864

865

### Script Filter

866

867

```python { .api }

868

class ScriptFilter(Filter):

869

"""

870

Filter using custom scripts for complex logic.

871

"""

872

873

def __init__(self, script, lang="mvel", params=None, **kwargs):

874

"""

875

Initialize ScriptFilter.

876

877

Args:

878

script (str): Script code

879

lang (str): Script language. Default: "mvel"

880

params (dict, optional): Script parameters

881

**kwargs: Additional script filter parameters

882

"""

883

pass

884

885

# Script-based filtering

886

from pyes import ScriptFilter

887

888

# Custom calculation filter

889

custom_filter = ScriptFilter(

890

script="doc['price'].value * doc['quantity'].value > params.min_total",

891

params={"min_total": 100}

892

)

893

894

# Date calculation filter

895

date_filter = ScriptFilter(

896

script="(System.currentTimeMillis() - doc['created_date'].value) < params.max_age_ms",

897

params={"max_age_ms": 86400000} # 24 hours

898

)

899

```

900

901

### Match All Filter

902

903

```python { .api }

904

class MatchAllFilter(Filter):

905

"""

906

Filter that matches all documents (passes everything through).

907

"""

908

909

def __init__(self, **kwargs):

910

"""

911

Initialize MatchAllFilter.

912

913

Args:

914

**kwargs: Additional parameters

915

"""

916

pass

917

918

# Match everything filter

919

from pyes import MatchAllFilter

920

921

# Pass all documents (useful as base filter)

922

all_filter = MatchAllFilter()

923

```

924

925

### Raw Filter

926

927

```python { .api }

928

class RawFilter(Filter):

929

"""

930

Raw ElasticSearch filter JSON for custom or unsupported filters.

931

"""

932

933

def __init__(self, filter_dict):

934

"""

935

Initialize RawFilter.

936

937

Args:

938

filter_dict (dict): Raw ElasticSearch filter JSON

939

"""

940

pass

941

942

# Raw filter for custom ElasticSearch functionality

943

from pyes import RawFilter

944

945

# Custom filter not supported by PyES

946

custom_raw_filter = RawFilter({

947

"custom_filter": {

948

"field": "special_field",

949

"parameters": {"custom_param": "value"}

950

}

951

})

952

```

953

954

## Filter Combination Patterns

955

956

### Complex Filter Composition

957

958

```python { .api }

959

# Build complex filters programmatically

960

def build_content_filter(categories=None, date_range=None, author=None,

961

location=None, min_rating=None, has_images=False):

962

"""

963

Build complex content filtering with multiple optional criteria.

964

"""

965

966

filters = []

967

968

# Category filtering

969

if categories:

970

if isinstance(categories, list):

971

filters.append(TermsFilter("category", categories))

972

else:

973

filters.append(TermFilter("category", categories))

974

975

# Date range filtering

976

if date_range:

977

filters.append(RangeFilter("published_date", **date_range))

978

979

# Author filtering

980

if author:

981

filters.append(TermFilter("author.keyword", author))

982

983

# Geographic filtering

984

if location:

985

filters.append(GeoDistanceFilter(**location))

986

987

# Rating filtering

988

if min_rating:

989

filters.append(RangeFilter("rating", gte=min_rating))

990

991

# Image requirement

992

if has_images:

993

filters.append(ExistsFilter("images"))

994

995

# Combine all filters

996

if len(filters) == 1:

997

return filters[0]

998

elif len(filters) > 1:

999

return BoolFilter(must=filters)

1000

else:

1001

return MatchAllFilter()

1002

1003

# Usage examples

1004

from pyes import Search, MatchAllQuery

1005

1006

# Restaurant search with multiple criteria

1007

restaurant_filter = build_content_filter(

1008

categories=["restaurant", "cafe"],

1009

location={

1010

"location": {"lat": 40.7128, "lon": -74.0060},

1011

"distance": "2km"

1012

},

1013

min_rating=4.0,

1014

has_images=True

1015

)

1016

1017

search = Search(MatchAllQuery()).filter(restaurant_filter)

1018

results = es.search(search, indices=["places"])

1019

1020

# Blog content filtering

1021

blog_filter = build_content_filter(

1022

categories="tutorial",

1023

date_range={"gte": "2023-01-01"},

1024

author="expert_author"

1025

)

1026

1027

blog_search = Search(MatchAllQuery()).filter(blog_filter)

1028

```

1029

1030

### Performance Optimization with Filters

1031

1032

```python { .api }

1033

# Optimize filter performance with proper ordering

1034

def optimize_filter_performance(filters):

1035

"""

1036

Order filters for optimal performance:

1037

1. Cheapest filters first (term, range)

1038

2. More expensive filters last (geo, script)

1039

3. Most selective filters first

1040

"""

1041

1042

# Categorize filters by performance cost

1043

cheap_filters = [] # term, terms, range, exists, missing

1044

medium_filters = [] # prefix, wildcard, regex

1045

expensive_filters = [] # geo, script, nested

1046

1047

for f in filters:

1048

filter_type = f.__class__.__name__

1049

if filter_type in ['TermFilter', 'TermsFilter', 'RangeFilter',

1050

'ExistsFilter', 'MissingFilter']:

1051

cheap_filters.append(f)

1052

elif filter_type in ['PrefixFilter', 'WildcardFilter', 'RegexTermFilter']:

1053

medium_filters.append(f)

1054

else:

1055

expensive_filters.append(f)

1056

1057

# Return optimally ordered filters

1058

return cheap_filters + medium_filters + expensive_filters

1059

1060

# Example usage

1061

filters = [

1062

ScriptFilter("complex_calculation()"), # Expensive

1063

TermFilter("status", "published"), # Cheap

1064

GeoDistanceFilter(location={"lat": 40, "lon": -74}, distance="5km"), # Expensive

1065

RangeFilter("published_date", gte="2023-01-01"), # Cheap

1066

ExistsFilter("content") # Cheap

1067

]

1068

1069

optimized_filters = optimize_filter_performance(filters)

1070

final_filter = BoolFilter(must=optimized_filters)

1071

```

1072

1073

### Caching and Performance

1074

1075

```python { .api }

1076

# Leverage ElasticSearch filter caching

1077

from pyes import BoolFilter, TermFilter, RangeFilter

1078

1079

# Filters are automatically cached by ElasticSearch

1080

# Design filters for optimal caching:

1081

1082

# 1. Use consistent filter values

1083

status_filter = TermFilter("status", "published") # Will be cached

1084

1085

# 2. Separate dynamic and static parts

1086

def build_cached_filter(dynamic_date):

1087

"""Build filter with cacheable and non-cacheable parts."""

1088

1089

# Static filters (highly cacheable)

1090

static_filter = BoolFilter(

1091

must=[

1092

TermFilter("status", "published"),

1093

TermFilter("category", "article"),

1094

ExistsFilter("content")

1095

]

1096

)

1097

1098

# Dynamic filter (less cacheable)

1099

dynamic_filter = RangeFilter("created_date", gte=dynamic_date)

1100

1101

# Combine efficiently

1102

return BoolFilter(

1103

must=[static_filter],

1104

filter=[dynamic_filter]

1105

)

1106

1107

# 3. Use filters in consistent order

1108

def consistent_multi_filter(tags, min_views):

1109

"""Always construct filters in same order for cache efficiency."""

1110

1111

filters = []

1112

1113

# Always add in same order

1114

if tags:

1115

filters.append(TermsFilter("tags", sorted(tags))) # Sort for consistency

1116

1117

if min_views:

1118

filters.append(RangeFilter("view_count", gte=min_views))

1119

1120

return BoolFilter(must=filters)

1121

```

1122

1123

The PyES Filter DSL provides comprehensive, high-performance filtering capabilities with full support for logical combinations, geospatial operations, document relationships, and complex filtering scenarios while maintaining ElasticSearch's filter caching benefits.