Tessl Tile for pypi/beautifulsoup4@4.3.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

content.md index.md modification.md output.md parsing.md search.md

search.mddocs/

0
# Element Search
1

2
Find elements using tag names, attributes, text content, CSS selectors, and custom matching functions. Beautiful Soup provides flexible search capabilities with both single and multiple result options, supporting various criteria types for precise element selection.
3

4
## Capabilities
5

6
### Basic Search Methods
7

8
Find elements in the parse tree using tag names, attributes, and text content.
9

10
```python { .api }
11
def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs):
12
    """
13
    Find the first element matching the given criteria.
14
    
15
    Parameters:
16
    - name: str, list, regex, callable, or True - tag name filter
17
    - attrs: dict - attribute filters
18
    - recursive: bool - search descendants (True) or direct children only (False)
19
    - text: str, list, regex, callable, or True - text content filter
20
    - **kwargs: attribute filters as keyword arguments
21
    
22
    Returns:
23
    PageElement or None if no match found
24
    """
25

26
def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs):
27
    """
28
    Find all elements matching the given criteria.
29
    
30
    Parameters:
31
    - name: str, list, regex, callable, or True - tag name filter
32
    - attrs: dict - attribute filters  
33
    - recursive: bool - search descendants (True) or direct children only (False)
34
    - text: str, list, regex, callable, or True - text content filter
35
    - limit: int - maximum number of results to return
36
    - **kwargs: attribute filters as keyword arguments
37
    
38
    Returns:
39
    ResultSet (list-like) of matching PageElement instances
40
    """
41

42
# Convenience method - equivalent to find_all
43
def __call__(self, *args, **kwargs):
44
    """Equivalent to find_all(*args, **kwargs)"""
45
```
46

47
Usage Examples:
48

49
```python
50
from bs4 import BeautifulSoup
51
import re
52

53
html = '''
54
<html>
55
  <body>
56
    <div class="container">
57
      <p id="intro">Introduction text</p>
58
      <p class="content">Main content</p>
59
      <a href="http://example.com">External link</a>
60
      <a href="/internal">Internal link</a>
61
    </div>
62
  </body>  
63
</html>
64
'''
65

66
soup = BeautifulSoup(html, 'html.parser')
67

68
# Find by tag name
69
first_p = soup.find('p')
70
all_ps = soup.find_all('p')
71

72
# Find by attributes
73
intro = soup.find('p', id='intro')
74
content = soup.find('p', class_='content')
75
external_link = soup.find('a', href='http://example.com')
76

77
# Find with attribute dictionary
78
intro = soup.find('p', attrs={'id': 'intro'})
79

80
# Find by multiple attributes
81
# (no results in this example, but shows syntax)
82
result = soup.find('p', {'class': 'content', 'id': 'special'})
83

84
# Find with regex patterns
85
external_links = soup.find_all('a', href=re.compile(r'^http'))
86
internal_links = soup.find_all('a', href=re.compile(r'^/'))
87

88
# Find with callable
89
def has_class(tag):
90
    return tag.has_attr('class')
91

92
elements_with_class = soup.find_all(has_class)
93

94
# Limit results
95
first_two_links = soup.find_all('a', limit=2)
96

97
# Search direct children only
98
container = soup.find('div', class_='container')
99
direct_children = container.find_all('p', recursive=False)
100
```
101

102
### CSS Selector Search
103

104
Use CSS selector syntax for complex element selection.
105

106
```python { .api }
107
def select(self, selector):
108
    """
109
    Find all elements matching a CSS selector.
110
    
111
    Parameters:
112
    - selector: str - CSS selector string
113
    
114
    Returns:
115
    ResultSet of matching elements
116
    
117
    Supported selectors:
118
    - Tag names: 'p', 'div'
119
    - IDs: '#myid' 
120
    - Classes: '.myclass'
121
    - Attributes: '[href]', '[href="value"]'
122
    - Pseudo-classes: ':first-child', ':nth-of-type(n)'
123
    - Combinators: 'div > p', 'div p', 'div + p', 'div ~ p'
124
    """
125

126
# Note: select_one() method was added in later versions of Beautiful Soup
127
# For version 4.3.2, use select(selector)[0] or select(selector)[:1] for first match
128
```
129

130
Usage Examples:
131

132
```python
133
html = '''
134
<div class="container">
135
  <h1 id="title">Page Title</h1>
136
  <div class="content">
137
    <p class="intro">Introduction</p>
138
    <p>Regular paragraph</p>
139
    <ul>
140
      <li>First item</li>
141
      <li class="special">Second item</li>
142
    </ul>
143
  </div>
144
</div>
145
'''
146

147
soup = BeautifulSoup(html, 'html.parser')
148

149
# Basic selectors
150
title_list = soup.select('#title')  # Returns list, use [0] for first element
151
title = title_list[0] if title_list else None
152
intro_list = soup.select('.intro')
153
intro = intro_list[0] if intro_list else None
154
all_paragraphs = soup.select('p')  # All p tags
155

156
# Attribute selectors
157
elements_with_class = soup.select('[class]')  # Has class attribute
158
special_items = soup.select('[class="special"]')  # Specific class value
159

160
# Descendant combinators
161
content_paragraphs = soup.select('div.content p')  # p descendants of div.content
162
direct_children = soup.select('div.content > p')  # p direct children of div.content
163

164
# Sibling combinators  
165
after_intro = soup.select('p.intro + p')  # p immediately after p.intro
166
all_after_intro = soup.select('p.intro ~ p')  # all p siblings after p.intro
167

168
# Pseudo-classes
169
first_li = soup.select('li:first-child')
170
second_li = soup.select('li:nth-of-type(2)')
171
last_p = soup.select('p:last-of-type')
172

173
# Complex selectors
174
special_in_content = soup.select('div.content .special')
175
nested_selection = soup.select('div.container > div > ul > li.special')
176
```
177

178
### Directional Search Methods
179

180
Search in specific directions from the current element.
181

182
```python { .api }
183
def find_next(self, name=None, attrs={}, text=None, **kwargs):
184
    """
185
    Find the next element in document order matching criteria.
186
    
187
    Returns:
188
    PageElement or None
189
    """
190

191
def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs):
192
    """
193
    Find all following elements in document order matching criteria.
194
    
195
    Returns:
196
    ResultSet of matching elements
197
    """
198

199
def find_previous(self, name=None, attrs={}, text=None, **kwargs):
200
    """
201
    Find the previous element in document order matching criteria.
202
    
203
    Returns:
204
    PageElement or None
205
    """
206

207
def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs):
208
    """
209
    Find all preceding elements in document order matching criteria.
210
    
211
    Returns:
212
    ResultSet of matching elements
213
    """
214

215
def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):
216
    """
217
    Find the next sibling element matching criteria.
218
    
219
    Returns:
220
    PageElement or None
221
    """
222

223
def find_next_siblings(self, name=None, attrs={}, text=None, limit=None, **kwargs):
224
    """
225
    Find all following sibling elements matching criteria.
226
    
227
    Returns:
228
    ResultSet of matching elements
229
    """
230

231
def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):
232
    """
233
    Find the previous sibling element matching criteria.
234
    
235
    Returns:
236
    PageElement or None
237
    """
238

239
def find_previous_siblings(self, name=None, attrs={}, text=None, limit=None, **kwargs):
240
    """
241
    Find all preceding sibling elements matching criteria.
242
    
243
    Returns:
244
    ResultSet of matching elements
245
    """
246

247
def find_parent(self, name=None, attrs={}, **kwargs):
248
    """
249
    Find the parent element matching criteria.
250
    
251
    Returns:
252
    PageElement or None
253
    """
254

255
def find_parents(self, name=None, attrs={}, limit=None, **kwargs):
256
    """
257
    Find all ancestor elements matching criteria.
258
    
259
    Returns:
260
    ResultSet of matching elements
261
    """
262
```
263

264
Usage Examples:
265

266
```python
267
html = '''
268
<article>
269
  <h1>Title</h1>
270
  <p>First paragraph</p>
271
  <div class="sidebar">Sidebar content</div>
272
  <p>Second paragraph</p>
273
  <footer>Footer</footer>
274
</article>
275
'''
276

277
soup = BeautifulSoup(html, 'html.parser')
278
first_p = soup.find('p')
279

280
# Find next elements
281
next_div = first_p.find_next('div')  # sidebar div
282
next_p = first_p.find_next('p')     # second paragraph
283
all_following = first_p.find_all_next()  # all elements after first p
284

285
# Find previous elements  
286
h1 = first_p.find_previous('h1')    # title
287
all_preceding = first_p.find_all_previous()  # h1 and title text
288

289
# Find siblings
290
next_sibling_div = first_p.find_next_sibling('div')  # sidebar
291
all_next_siblings = first_p.find_next_siblings()     # div, p, footer
292

293
# Find parents
294
article = first_p.find_parent('article')
295
all_parents = first_p.find_parents()  # article, then document root
296
```
297

298
### Advanced Search Patterns
299

300
Complex search criteria using callables, regular expressions, and custom matching logic.
301

302
```python { .api }
303
# Search criteria types
304
SearchCriteria = Union[
305
    str,           # Exact match
306
    list,          # Match any item in list
307
    re.Pattern,    # Regex pattern match
308
    callable,      # Custom function returning bool
309
    True,          # Match any (for text: any non-empty string)
310
    None           # No filter (match all)
311
]
312
```
313

314
Usage Examples:
315

316
```python
317
import re
318
from bs4 import BeautifulSoup
319

320
html = '''
321
<div>
322
  <p class="intro summary">Introduction</p>
323
  <p class="content">Main content</p>  
324
  <a href="mailto:user@example.com">Email</a>
325
  <a href="http://example.com">Website</a>
326
  <span data-value="123">Data span</span>
327
</div>
328
'''
329

330
soup = BeautifulSoup(html, 'html.parser')
331

332
# List matching - multiple values
333
paragraphs = soup.find_all('p', class_=['intro', 'content'])
334

335
# Regex matching
336
email_links = soup.find_all('a', href=re.compile(r'^mailto:'))
337
data_elements = soup.find_all(attrs={'data-value': re.compile(r'\d+')})
338

339
# Callable matching
340
def has_multiple_classes(tag):
341
    return tag.has_attr('class') and len(tag['class']) > 1
342

343
multi_class_elements = soup.find_all(has_multiple_classes)
344

345
def is_external_link(tag):
346
    return (tag.name == 'a' and 
347
            tag.has_attr('href') and 
348
            tag['href'].startswith('http'))
349

350
external_links = soup.find_all(is_external_link)
351

352
# Text content search
353
elements_with_text = soup.find_all(text=True)  # All text nodes
354
intro_text = soup.find_all(text=re.compile(r'Intro'))  # Text containing 'Intro'
355

356
# Complex combined criteria
357
def complex_criteria(tag):
358
    return (tag.name in ['p', 'div'] and
359
            tag.has_attr('class') and
360
            'content' in tag.get('class', []))
361

362
matching_elements = soup.find_all(complex_criteria)
363
```
364

365
### Search Utilities
366

367
Helper classes and functions for search operations.
368

369
```python { .api }
370
class SoupStrainer:
371
    """Encapsulates search criteria for filtering elements during parsing."""
372
    
373
    def __init__(self, name=None, attrs={}, text=None, **kwargs):
374
        """
375
        Create search criteria for parsing or post-parse filtering.
376
        
377
        Parameters same as find() method
378
        """
379
    
380
    def search(self, markup):
381
        """Test if element matches criteria"""
382
    
383
    def search_tag(self, markup_name, markup_attrs):
384
        """Test if tag matches criteria"""
385

386
class ResultSet(list):
387
    """List subclass that tracks the search criteria used to generate results."""
388
    
389
    @property
390
    def source(self):
391
        """The SoupStrainer that generated these results"""
392
```
393

394
Usage Examples:
395

396
```python
397
from bs4 import BeautifulSoup, SoupStrainer
398

399
# Use SoupStrainer to limit parsing
400
only_links = SoupStrainer('a')
401
soup = BeautifulSoup(html, 'html.parser', parse_only=only_links)
402

403
# ResultSet provides search context
404
results = soup.find_all('p')
405
print(type(results))  # <class 'bs4.element.ResultSet'>
406
print(results.source)  # Shows the SoupStrainer used
407
```
408

409
### Backward Compatibility
410

411
Legacy search methods from BeautifulSoup 3.x.
412

413
```python { .api }
414
# BeautifulSoup 3.x compatibility
415
def findAll(self, *args, **kwargs):  # Use find_all instead
416
    """Deprecated: use find_all"""
417

418
def findNext(self, *args, **kwargs):  # Use find_next instead  
419
    """Deprecated: use find_next"""
420
```

Version

Tile

Files

search.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

search.mddocs/