A comprehensive BibTeX parser library for Python 3 that enables parsing and writing of bibliographic data files
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Low-level parsing control through the BibtexExpression class that provides access to the underlying pyparsing grammar. This module enables advanced customization of parsing behavior, parse actions, and error handling for specialized BibTeX processing needs.
The core BibtexExpression class provides access to the pyparsing-based grammar that powers the BibTeX parser, enabling low-level customization and control.
class BibtexExpression:
"""
Low-level BibTeX parsing expression using pyparsing grammar.
Provides access to the underlying parsing components and allows
for advanced customization of parsing behavior through parse actions
and grammar modifications.
"""
def __init__(self):
"""Create a new BibtexExpression parser instance."""
def parseFile(self, file_obj):
"""
Parse a BibTeX file using the expression grammar.
Parameters:
- file_obj: File object to parse
Returns:
Parsed result from pyparsing
"""
def add_log_function(self, log_fun):
"""
Add logging function for parsing events.
Parameters:
- log_fun (callable): Function to call for logging parse events
"""
def set_string_name_parse_action(self, fun):
"""
Set parse action for string name processing.
Parameters:
- fun (callable): Function to process string names during parsing
"""Access to individual grammar components for fine-grained parsing control and customization.
# Grammar component attributes available on BibtexExpression instances:
# entry: pyparsing.ParserElement
# Grammar for parsing BibTeX entries (@article, @book, etc.)
# explicit_comment: pyparsing.ParserElement
# Grammar for parsing explicit @comment entries
# implicit_comment: pyparsing.ParserElement
# Grammar for parsing implicit comments (text outside entries)
# string_def: pyparsing.ParserElement
# Grammar for parsing @string definitions
# preamble_decl: pyparsing.ParserElement
# Grammar for parsing @preamble declarations
# main_expression: pyparsing.ParserElement
# Main grammar expression combining all BibTeX components
# ParseException: Exception
# Exception class for parsing errors (from pyparsing)Utility functions for processing parsed content and manipulating parsing behavior.
def strip_after_new_lines(s: str) -> str:
"""
Strip whitespace from continuation lines in multi-line strings.
Parameters:
- s (str): Input string with potential continuation lines
Returns:
str: String with cleaned continuation lines
"""
def add_logger_parse_action(expr, log_func):
"""
Add logging parse action to a pyparsing expression.
Parameters:
- expr: pyparsing expression to add logging to
- log_func (callable): Function to call for logging
Returns:
Modified pyparsing expression with logging
"""from bibtexparser.bibtexexpression import BibtexExpression
# Create expression parser
expr = BibtexExpression()
# Add custom logging
def log_entries(tokens):
print(f"Parsed entry: {tokens[0].get('ID', 'unknown')}")
expr.add_log_function(log_entries)
# Parse with custom actions
with open('bibliography.bib') as f:
result = expr.parseFile(f)from bibtexparser.bibtexexpression import BibtexExpression
from bibtexparser.bparser import BibTexParser
# Create custom parser with expression control
expr = BibtexExpression()
# Customize string name processing
def process_string_names(tokens):
# Custom processing of @string names
return tokens[0].lower()
expr.set_string_name_parse_action(process_string_names)
# Use with main parser
parser = BibTexParser()
# Note: BibtexExpression is used internally by BibTexParser
# This example shows the conceptual usagefrom bibtexparser.bibtexexpression import BibtexExpression
expr = BibtexExpression()
try:
with open('malformed.bib') as f:
result = expr.parseFile(f)
except expr.ParseException as e:
print(f"Parse error at line {e.lineno}: {e.msg}")
print(f"Context: {e.line}")from bibtexparser.bibtexexpression import BibtexExpression
expr = BibtexExpression()
# Access specific grammar components
entry_grammar = expr.entry
comment_grammar = expr.explicit_comment
string_grammar = expr.string_def
# Use individual components for specialized parsing
test_string = "@string{jan = \"January\"}"
try:
result = string_grammar.parseString(test_string)
print(f"Parsed string definition: {result}")
except expr.ParseException as e:
print(f"Failed to parse string: {e}")The BibtexExpression class is used internally by BibTexParser but can be accessed for advanced customization:
from bibtexparser.bparser import BibTexParser
from bibtexparser.bibtexexpression import BibtexExpression
# Create parser with custom expression handling
parser = BibTexParser()
# The parser uses BibtexExpression internally
# Advanced users can subclass BibTexParser to access and modify
# the underlying expression grammar for specialized needs
class CustomBibTexParser(BibTexParser):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
# Access and customize the internal expression parser
# Note: This requires understanding of the internal implementationInstall with Tessl CLI
npx tessl i tessl/pypi-bibtexparser@1.4.2