Python library for pulling data out of HTML and XML files with pluggable parser architecture and intuitive navigation API
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Navigate through the parse tree using parent-child relationships, sibling traversal, and document-order iteration. Beautiful Soup provides both direct property access and generator-based iteration for memory-efficient traversal of large documents.
Navigate up and down the parse tree hierarchy using parent and children relationships.
@property
def parent(self):
"""
The parent element of this element, or None if this is the root.
Returns:
PageElement or None
"""
@property
def contents(self):
"""
List of direct children of this element.
Returns:
list of PageElement instances
"""
@property
def children(self):
"""
Generator yielding direct children of this element.
Yields:
PageElement instances
"""
@property
def descendants(self):
"""
Generator yielding all descendant elements in document order.
Yields:
PageElement instances (tags and strings)
"""
@property
def parents(self):
"""
Generator yielding all parent elements up to the document root.
Yields:
PageElement instances
"""Usage Examples:
from bs4 import BeautifulSoup
html = '''
<html>
<body>
<div class="container">
<p>First paragraph</p>
<p>Second paragraph</p>
</div>
</body>
</html>
'''
soup = BeautifulSoup(html, 'html.parser')
div = soup.find('div')
first_p = soup.find('p')
# Parent access
print(div.parent.name) # 'body'
print(first_p.parent.name) # 'div'
# Children access
print(len(div.contents)) # 5 (includes whitespace text nodes)
print([child.name for child in div.children if child.name]) # ['p', 'p']
# Descendants - all elements below
for element in div.descendants:
if hasattr(element, 'name') and element.name:
print(element.name) # div, p, p
# Parents - up to root
for parent in first_p.parents:
if parent.name:
print(parent.name) # div, body, htmlNavigate horizontally through elements at the same level in the parse tree.
@property
def next_sibling(self):
"""
The next sibling element, or None if this is the last child.
Returns:
PageElement or None
"""
@property
def previous_sibling(self):
"""
The previous sibling element, or None if this is the first child.
Returns:
PageElement or None
"""
@property
def next_siblings(self):
"""
Generator yielding all following sibling elements.
Yields:
PageElement instances
"""
@property
def previous_siblings(self):
"""
Generator yielding all preceding sibling elements in reverse order.
Yields:
PageElement instances
"""Usage Examples:
html = '<div><p>One</p><p>Two</p><p>Three</p></div>'
soup = BeautifulSoup(html, 'html.parser')
first_p = soup.find('p')
second_p = first_p.next_sibling.next_sibling # Skip whitespace text node
third_p = soup.find_all('p')[2]
# Direct sibling access
print(second_p.previous_sibling.previous_sibling.string) # 'One'
print(second_p.next_sibling.next_sibling.string) # 'Three'
# Iterate through siblings
for sibling in first_p.next_siblings:
if hasattr(sibling, 'name') and sibling.name == 'p':
print(sibling.string) # 'Two', 'Three'
for sibling in third_p.previous_siblings:
if hasattr(sibling, 'name') and sibling.name == 'p':
print(sibling.string) # 'Two', 'One'Navigate through elements in the order they appear in the source document.
@property
def next_element(self):
"""
The next element in document order, or None if this is the last.
Returns:
PageElement or None
"""
@property
def previous_element(self):
"""
The previous element in document order, or None if this is the first.
Returns:
PageElement or None
"""
@property
def next_elements(self):
"""
Generator yielding all following elements in document order.
Yields:
PageElement instances (tags and strings)
"""
@property
def previous_elements(self):
"""
Generator yielding all preceding elements in reverse document order.
Yields:
PageElement instances (tags and strings)
"""Usage Examples:
html = '<div><p>Para <em>emphasis</em> text</p><span>After</span></div>'
soup = BeautifulSoup(html, 'html.parser')
p_tag = soup.find('p')
em_tag = soup.find('em')
# Document order navigation
current = p_tag
while current:
if hasattr(current, 'name') and current.name:
print(f"Tag: {current.name}")
elif isinstance(current, str) and current.strip():
print(f"Text: {current.strip()}")
current = current.next_element
# Output: Tag: p, Text: Para, Tag: em, Text: emphasis, Text: text, Tag: span, Text: After
# Find all text in document order from a starting point
text_content = []
for element in em_tag.next_elements:
if isinstance(element, str) and element.strip():
text_content.append(element.strip())
if hasattr(element, 'name') and element.name == 'span':
break
print(text_content) # ['text', 'After']Helper methods for common navigation patterns.
def index(self, element):
"""
Get the index of a child element.
Parameters:
- element: PageElement to find
Returns:
int, index of element in contents list
Raises:
ValueError if element is not a child
"""
@property
def is_empty_element(self):
"""
True if this tag has no contents and can be rendered as self-closing.
Returns:
bool
"""Usage Examples:
html = '<ul><li>First</li><li>Second</li><li>Third</li></ul>'
soup = BeautifulSoup(html, 'html.parser')
ul = soup.find('ul')
second_li = soup.find_all('li')[1]
# Get child index
print(ul.index(second_li)) # Index position of second <li>
# Check if element is empty
empty_div = soup.new_tag('div')
print(empty_div.is_empty_element) # True
div_with_content = soup.new_tag('div')
div_with_content.string = 'Content'
print(div_with_content.is_empty_element) # FalseLegacy navigation methods from BeautifulSoup 3.x are aliased for compatibility.
# BeautifulSoup 3.x compatibility aliases
@property
def nextSibling(self): # Use next_sibling instead
"""Deprecated: use next_sibling"""
@property
def previousSibling(self): # Use previous_sibling instead
"""Deprecated: use previous_sibling"""
@property
def findNextSibling(self): # Use find_next_sibling instead
"""Deprecated: use find_next_sibling"""
@property
def findPreviousSibling(self): # Use find_previous_sibling instead
"""Deprecated: use find_previous_sibling"""Install with Tessl CLI
npx tessl i tessl/pypi-beautifulsoup4