or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

content.mdindex.mdmodification.mdnavigation.mdoutput.mdparsing.mdsearch.md

search.mddocs/

0

# Element Search

1

2

Find elements using tag names, attributes, text content, CSS selectors, and custom matching functions. Beautiful Soup provides flexible search capabilities with both single and multiple result options, supporting various criteria types for precise element selection.

3

4

## Capabilities

5

6

### Basic Search Methods

7

8

Find elements in the parse tree using tag names, attributes, and text content.

9

10

```python { .api }

11

def find(self, name=None, attrs={}, recursive=True, text=None, **kwargs):

12

"""

13

Find the first element matching the given criteria.

14

15

Parameters:

16

- name: str, list, regex, callable, or True - tag name filter

17

- attrs: dict - attribute filters

18

- recursive: bool - search descendants (True) or direct children only (False)

19

- text: str, list, regex, callable, or True - text content filter

20

- **kwargs: attribute filters as keyword arguments

21

22

Returns:

23

PageElement or None if no match found

24

"""

25

26

def find_all(self, name=None, attrs={}, recursive=True, text=None, limit=None, **kwargs):

27

"""

28

Find all elements matching the given criteria.

29

30

Parameters:

31

- name: str, list, regex, callable, or True - tag name filter

32

- attrs: dict - attribute filters

33

- recursive: bool - search descendants (True) or direct children only (False)

34

- text: str, list, regex, callable, or True - text content filter

35

- limit: int - maximum number of results to return

36

- **kwargs: attribute filters as keyword arguments

37

38

Returns:

39

ResultSet (list-like) of matching PageElement instances

40

"""

41

42

# Convenience method - equivalent to find_all

43

def __call__(self, *args, **kwargs):

44

"""Equivalent to find_all(*args, **kwargs)"""

45

```

46

47

Usage Examples:

48

49

```python

50

from bs4 import BeautifulSoup

51

import re

52

53

html = '''

54

<html>

55

<body>

56

<div class="container">

57

<p id="intro">Introduction text</p>

58

<p class="content">Main content</p>

59

<a href="http://example.com">External link</a>

60

<a href="/internal">Internal link</a>

61

</div>

62

</body>

63

</html>

64

'''

65

66

soup = BeautifulSoup(html, 'html.parser')

67

68

# Find by tag name

69

first_p = soup.find('p')

70

all_ps = soup.find_all('p')

71

72

# Find by attributes

73

intro = soup.find('p', id='intro')

74

content = soup.find('p', class_='content')

75

external_link = soup.find('a', href='http://example.com')

76

77

# Find with attribute dictionary

78

intro = soup.find('p', attrs={'id': 'intro'})

79

80

# Find by multiple attributes

81

# (no results in this example, but shows syntax)

82

result = soup.find('p', {'class': 'content', 'id': 'special'})

83

84

# Find with regex patterns

85

external_links = soup.find_all('a', href=re.compile(r'^http'))

86

internal_links = soup.find_all('a', href=re.compile(r'^/'))

87

88

# Find with callable

89

def has_class(tag):

90

return tag.has_attr('class')

91

92

elements_with_class = soup.find_all(has_class)

93

94

# Limit results

95

first_two_links = soup.find_all('a', limit=2)

96

97

# Search direct children only

98

container = soup.find('div', class_='container')

99

direct_children = container.find_all('p', recursive=False)

100

```

101

102

### CSS Selector Search

103

104

Use CSS selector syntax for complex element selection.

105

106

```python { .api }

107

def select(self, selector):

108

"""

109

Find all elements matching a CSS selector.

110

111

Parameters:

112

- selector: str - CSS selector string

113

114

Returns:

115

ResultSet of matching elements

116

117

Supported selectors:

118

- Tag names: 'p', 'div'

119

- IDs: '#myid'

120

- Classes: '.myclass'

121

- Attributes: '[href]', '[href="value"]'

122

- Pseudo-classes: ':first-child', ':nth-of-type(n)'

123

- Combinators: 'div > p', 'div p', 'div + p', 'div ~ p'

124

"""

125

126

# Note: select_one() method was added in later versions of Beautiful Soup

127

# For version 4.3.2, use select(selector)[0] or select(selector)[:1] for first match

128

```

129

130

Usage Examples:

131

132

```python

133

html = '''

134

<div class="container">

135

<h1 id="title">Page Title</h1>

136

<div class="content">

137

<p class="intro">Introduction</p>

138

<p>Regular paragraph</p>

139

<ul>

140

<li>First item</li>

141

<li class="special">Second item</li>

142

</ul>

143

</div>

144

</div>

145

'''

146

147

soup = BeautifulSoup(html, 'html.parser')

148

149

# Basic selectors

150

title_list = soup.select('#title') # Returns list, use [0] for first element

151

title = title_list[0] if title_list else None

152

intro_list = soup.select('.intro')

153

intro = intro_list[0] if intro_list else None

154

all_paragraphs = soup.select('p') # All p tags

155

156

# Attribute selectors

157

elements_with_class = soup.select('[class]') # Has class attribute

158

special_items = soup.select('[class="special"]') # Specific class value

159

160

# Descendant combinators

161

content_paragraphs = soup.select('div.content p') # p descendants of div.content

162

direct_children = soup.select('div.content > p') # p direct children of div.content

163

164

# Sibling combinators

165

after_intro = soup.select('p.intro + p') # p immediately after p.intro

166

all_after_intro = soup.select('p.intro ~ p') # all p siblings after p.intro

167

168

# Pseudo-classes

169

first_li = soup.select('li:first-child')

170

second_li = soup.select('li:nth-of-type(2)')

171

last_p = soup.select('p:last-of-type')

172

173

# Complex selectors

174

special_in_content = soup.select('div.content .special')

175

nested_selection = soup.select('div.container > div > ul > li.special')

176

```

177

178

### Directional Search Methods

179

180

Search in specific directions from the current element.

181

182

```python { .api }

183

def find_next(self, name=None, attrs={}, text=None, **kwargs):

184

"""

185

Find the next element in document order matching criteria.

186

187

Returns:

188

PageElement or None

189

"""

190

191

def find_all_next(self, name=None, attrs={}, text=None, limit=None, **kwargs):

192

"""

193

Find all following elements in document order matching criteria.

194

195

Returns:

196

ResultSet of matching elements

197

"""

198

199

def find_previous(self, name=None, attrs={}, text=None, **kwargs):

200

"""

201

Find the previous element in document order matching criteria.

202

203

Returns:

204

PageElement or None

205

"""

206

207

def find_all_previous(self, name=None, attrs={}, text=None, limit=None, **kwargs):

208

"""

209

Find all preceding elements in document order matching criteria.

210

211

Returns:

212

ResultSet of matching elements

213

"""

214

215

def find_next_sibling(self, name=None, attrs={}, text=None, **kwargs):

216

"""

217

Find the next sibling element matching criteria.

218

219

Returns:

220

PageElement or None

221

"""

222

223

def find_next_siblings(self, name=None, attrs={}, text=None, limit=None, **kwargs):

224

"""

225

Find all following sibling elements matching criteria.

226

227

Returns:

228

ResultSet of matching elements

229

"""

230

231

def find_previous_sibling(self, name=None, attrs={}, text=None, **kwargs):

232

"""

233

Find the previous sibling element matching criteria.

234

235

Returns:

236

PageElement or None

237

"""

238

239

def find_previous_siblings(self, name=None, attrs={}, text=None, limit=None, **kwargs):

240

"""

241

Find all preceding sibling elements matching criteria.

242

243

Returns:

244

ResultSet of matching elements

245

"""

246

247

def find_parent(self, name=None, attrs={}, **kwargs):

248

"""

249

Find the parent element matching criteria.

250

251

Returns:

252

PageElement or None

253

"""

254

255

def find_parents(self, name=None, attrs={}, limit=None, **kwargs):

256

"""

257

Find all ancestor elements matching criteria.

258

259

Returns:

260

ResultSet of matching elements

261

"""

262

```

263

264

Usage Examples:

265

266

```python

267

html = '''

268

<article>

269

<h1>Title</h1>

270

<p>First paragraph</p>

271

<div class="sidebar">Sidebar content</div>

272

<p>Second paragraph</p>

273

<footer>Footer</footer>

274

</article>

275

'''

276

277

soup = BeautifulSoup(html, 'html.parser')

278

first_p = soup.find('p')

279

280

# Find next elements

281

next_div = first_p.find_next('div') # sidebar div

282

next_p = first_p.find_next('p') # second paragraph

283

all_following = first_p.find_all_next() # all elements after first p

284

285

# Find previous elements

286

h1 = first_p.find_previous('h1') # title

287

all_preceding = first_p.find_all_previous() # h1 and title text

288

289

# Find siblings

290

next_sibling_div = first_p.find_next_sibling('div') # sidebar

291

all_next_siblings = first_p.find_next_siblings() # div, p, footer

292

293

# Find parents

294

article = first_p.find_parent('article')

295

all_parents = first_p.find_parents() # article, then document root

296

```

297

298

### Advanced Search Patterns

299

300

Complex search criteria using callables, regular expressions, and custom matching logic.

301

302

```python { .api }

303

# Search criteria types

304

SearchCriteria = Union[

305

str, # Exact match

306

list, # Match any item in list

307

re.Pattern, # Regex pattern match

308

callable, # Custom function returning bool

309

True, # Match any (for text: any non-empty string)

310

None # No filter (match all)

311

]

312

```

313

314

Usage Examples:

315

316

```python

317

import re

318

from bs4 import BeautifulSoup

319

320

html = '''

321

<div>

322

<p class="intro summary">Introduction</p>

323

<p class="content">Main content</p>

324

<a href="mailto:user@example.com">Email</a>

325

<a href="http://example.com">Website</a>

326

<span data-value="123">Data span</span>

327

</div>

328

'''

329

330

soup = BeautifulSoup(html, 'html.parser')

331

332

# List matching - multiple values

333

paragraphs = soup.find_all('p', class_=['intro', 'content'])

334

335

# Regex matching

336

email_links = soup.find_all('a', href=re.compile(r'^mailto:'))

337

data_elements = soup.find_all(attrs={'data-value': re.compile(r'\d+')})

338

339

# Callable matching

340

def has_multiple_classes(tag):

341

return tag.has_attr('class') and len(tag['class']) > 1

342

343

multi_class_elements = soup.find_all(has_multiple_classes)

344

345

def is_external_link(tag):

346

return (tag.name == 'a' and

347

tag.has_attr('href') and

348

tag['href'].startswith('http'))

349

350

external_links = soup.find_all(is_external_link)

351

352

# Text content search

353

elements_with_text = soup.find_all(text=True) # All text nodes

354

intro_text = soup.find_all(text=re.compile(r'Intro')) # Text containing 'Intro'

355

356

# Complex combined criteria

357

def complex_criteria(tag):

358

return (tag.name in ['p', 'div'] and

359

tag.has_attr('class') and

360

'content' in tag.get('class', []))

361

362

matching_elements = soup.find_all(complex_criteria)

363

```

364

365

### Search Utilities

366

367

Helper classes and functions for search operations.

368

369

```python { .api }

370

class SoupStrainer:

371

"""Encapsulates search criteria for filtering elements during parsing."""

372

373

def __init__(self, name=None, attrs={}, text=None, **kwargs):

374

"""

375

Create search criteria for parsing or post-parse filtering.

376

377

Parameters same as find() method

378

"""

379

380

def search(self, markup):

381

"""Test if element matches criteria"""

382

383

def search_tag(self, markup_name, markup_attrs):

384

"""Test if tag matches criteria"""

385

386

class ResultSet(list):

387

"""List subclass that tracks the search criteria used to generate results."""

388

389

@property

390

def source(self):

391

"""The SoupStrainer that generated these results"""

392

```

393

394

Usage Examples:

395

396

```python

397

from bs4 import BeautifulSoup, SoupStrainer

398

399

# Use SoupStrainer to limit parsing

400

only_links = SoupStrainer('a')

401

soup = BeautifulSoup(html, 'html.parser', parse_only=only_links)

402

403

# ResultSet provides search context

404

results = soup.find_all('p')

405

print(type(results)) # <class 'bs4.element.ResultSet'>

406

print(results.source) # Shows the SoupStrainer used

407

```

408

409

### Backward Compatibility

410

411

Legacy search methods from BeautifulSoup 3.x.

412

413

```python { .api }

414

# BeautifulSoup 3.x compatibility

415

def findAll(self, *args, **kwargs): # Use find_all instead

416

"""Deprecated: use find_all"""

417

418

def findNext(self, *args, **kwargs): # Use find_next instead

419

"""Deprecated: use find_next"""

420

```