CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-beautifulsoup4

Python library for pulling data out of HTML and XML files with pluggable parser architecture and intuitive navigation API

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

navigation.mddocs/

Tree Navigation

Navigate through the parse tree using parent-child relationships, sibling traversal, and document-order iteration. Beautiful Soup provides both direct property access and generator-based iteration for memory-efficient traversal of large documents.

Capabilities

Parent-Child Navigation

Navigate up and down the parse tree hierarchy using parent and children relationships.

@property
def parent(self):
    """
    The parent element of this element, or None if this is the root.
    
    Returns:
    PageElement or None
    """

@property  
def contents(self):
    """
    List of direct children of this element.
    
    Returns:
    list of PageElement instances
    """

@property
def children(self):
    """
    Generator yielding direct children of this element.
    
    Yields:
    PageElement instances
    """

@property
def descendants(self):
    """
    Generator yielding all descendant elements in document order.
    
    Yields:
    PageElement instances (tags and strings)
    """

@property
def parents(self):
    """
    Generator yielding all parent elements up to the document root.
    
    Yields:  
    PageElement instances
    """

Usage Examples:

from bs4 import BeautifulSoup

html = '''
<html>
  <body>
    <div class="container">
      <p>First paragraph</p>
      <p>Second paragraph</p>
    </div>
  </body>
</html>
'''

soup = BeautifulSoup(html, 'html.parser')
div = soup.find('div')
first_p = soup.find('p')

# Parent access
print(div.parent.name)  # 'body'
print(first_p.parent.name)  # 'div'

# Children access  
print(len(div.contents))  # 5 (includes whitespace text nodes)
print([child.name for child in div.children if child.name])  # ['p', 'p']

# Descendants - all elements below
for element in div.descendants:
    if hasattr(element, 'name') and element.name:
        print(element.name)  # div, p, p

# Parents - up to root
for parent in first_p.parents:
    if parent.name:
        print(parent.name)  # div, body, html

Sibling Navigation

Navigate horizontally through elements at the same level in the parse tree.

@property
def next_sibling(self):
    """
    The next sibling element, or None if this is the last child.
    
    Returns:
    PageElement or None
    """

@property
def previous_sibling(self):
    """
    The previous sibling element, or None if this is the first child.
    
    Returns:
    PageElement or None
    """

@property
def next_siblings(self):
    """
    Generator yielding all following sibling elements.
    
    Yields:
    PageElement instances
    """

@property
def previous_siblings(self):
    """
    Generator yielding all preceding sibling elements in reverse order.
    
    Yields:
    PageElement instances  
    """

Usage Examples:

html = '<div><p>One</p><p>Two</p><p>Three</p></div>'
soup = BeautifulSoup(html, 'html.parser')

first_p = soup.find('p')
second_p = first_p.next_sibling.next_sibling  # Skip whitespace text node
third_p = soup.find_all('p')[2]

# Direct sibling access
print(second_p.previous_sibling.previous_sibling.string)  # 'One'
print(second_p.next_sibling.next_sibling.string)  # 'Three'

# Iterate through siblings
for sibling in first_p.next_siblings:
    if hasattr(sibling, 'name') and sibling.name == 'p':
        print(sibling.string)  # 'Two', 'Three'

for sibling in third_p.previous_siblings:
    if hasattr(sibling, 'name') and sibling.name == 'p':
        print(sibling.string)  # 'Two', 'One'

Document-Order Navigation

Navigate through elements in the order they appear in the source document.

@property
def next_element(self):
    """
    The next element in document order, or None if this is the last.
    
    Returns:
    PageElement or None
    """

@property
def previous_element(self):
    """
    The previous element in document order, or None if this is the first.
    
    Returns:
    PageElement or None
    """

@property
def next_elements(self):
    """
    Generator yielding all following elements in document order.
    
    Yields:
    PageElement instances (tags and strings)
    """

@property
def previous_elements(self):
    """
    Generator yielding all preceding elements in reverse document order.
    
    Yields:
    PageElement instances (tags and strings)
    """

Usage Examples:

html = '<div><p>Para <em>emphasis</em> text</p><span>After</span></div>'
soup = BeautifulSoup(html, 'html.parser')

p_tag = soup.find('p')
em_tag = soup.find('em')

# Document order navigation
current = p_tag
while current:
    if hasattr(current, 'name') and current.name:
        print(f"Tag: {current.name}")
    elif isinstance(current, str) and current.strip():
        print(f"Text: {current.strip()}")
    current = current.next_element

# Output: Tag: p, Text: Para, Tag: em, Text: emphasis, Text: text, Tag: span, Text: After

# Find all text in document order from a starting point
text_content = []
for element in em_tag.next_elements:
    if isinstance(element, str) and element.strip():
        text_content.append(element.strip())
    if hasattr(element, 'name') and element.name == 'span':
        break
print(text_content)  # ['text', 'After']

Navigation Utilities

Helper methods for common navigation patterns.

def index(self, element):
    """
    Get the index of a child element.
    
    Parameters:
    - element: PageElement to find
    
    Returns:
    int, index of element in contents list
    
    Raises:
    ValueError if element is not a child
    """

@property
def is_empty_element(self):
    """
    True if this tag has no contents and can be rendered as self-closing.
    
    Returns:
    bool
    """

Usage Examples:

html = '<ul><li>First</li><li>Second</li><li>Third</li></ul>'
soup = BeautifulSoup(html, 'html.parser')

ul = soup.find('ul')
second_li = soup.find_all('li')[1]

# Get child index
print(ul.index(second_li))  # Index position of second <li>

# Check if element is empty
empty_div = soup.new_tag('div')
print(empty_div.is_empty_element)  # True

div_with_content = soup.new_tag('div')
div_with_content.string = 'Content'
print(div_with_content.is_empty_element)  # False

Backward Compatibility

Legacy navigation methods from BeautifulSoup 3.x are aliased for compatibility.

# BeautifulSoup 3.x compatibility aliases
@property
def nextSibling(self):  # Use next_sibling instead
    """Deprecated: use next_sibling"""

@property  
def previousSibling(self):  # Use previous_sibling instead
    """Deprecated: use previous_sibling"""

@property
def findNextSibling(self):  # Use find_next_sibling instead  
    """Deprecated: use find_next_sibling"""

@property
def findPreviousSibling(self):  # Use find_previous_sibling instead
    """Deprecated: use find_previous_sibling"""

Install with Tessl CLI

npx tessl i tessl/pypi-beautifulsoup4

docs

content.md

index.md

modification.md

navigation.md

output.md

parsing.md

search.md

tile.json