A comprehensive BibTeX parser library for Python 3 that enables parsing and writing of bibliographic data files
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Configurable parser with extensive options for handling various BibTeX formats, non-standard entries, field processing, and customization hooks. The BibTexParser class provides fine-grained control over the parsing process.
The BibTexParser class provides comprehensive configuration options for customizing the parsing behavior to handle different BibTeX variants and requirements.
class BibTexParser:
"""
A parser for reading BibTeX bibliographic data files.
Provides extensive configuration options for customizing parsing behavior
including entry filtering, field processing, string handling, and
cross-reference resolution.
"""
def __init__(
self,
customization=None,
ignore_nonstandard_types: bool = True,
homogenize_fields: bool = False,
interpolate_strings: bool = True,
common_strings: bool = True,
add_missing_from_crossref: bool = False
):
"""
Create a configurable BibTeX parser.
Parameters:
- customization (callable, optional): Function to process entries after parsing
- ignore_nonstandard_types (bool): If True, ignore non-standard entry types
- homogenize_fields (bool): If True, normalize field names (e.g., 'url' to 'link')
- interpolate_strings (bool): If True, replace string references with values
- common_strings (bool): If True, include common month abbreviations
- add_missing_from_crossref (bool): If True, resolve crossref dependencies
Returns:
BibTexParser: Configured parser instance
"""Parse BibTeX data from strings with full configuration control and error handling options.
def parse(self, bibtex_str: str, partial: bool = False) -> BibDatabase:
"""
Parse a BibTeX string into a BibDatabase object.
Parameters:
- bibtex_str (str): BibTeX string to parse
- partial (bool): If True, continue parsing on errors; if False, raise exceptions
Returns:
BibDatabase: Parsed bibliographic database
Raises:
ParseException: If parsing fails and partial=False
"""Parse BibTeX data from file objects with the same configuration and error handling as string parsing.
def parse_file(self, file, partial: bool = False) -> BibDatabase:
"""
Parse a BibTeX file into a BibDatabase object.
Parameters:
- file (file): File object to parse
- partial (bool): If True, continue parsing on errors; if False, raise exceptions
Returns:
BibDatabase: Parsed bibliographic database
Raises:
ParseException: If parsing fails and partial=False
"""Module-level convenience function for quick parsing with custom configuration.
def parse(data: str, *args, **kwargs) -> BibDatabase:
"""
Convenience function for parsing BibTeX data.
Creates a BibTexParser with the provided arguments and parses the data.
Parameters:
- data (str): BibTeX string to parse
- *args, **kwargs: Arguments passed to BibTexParser constructor
Returns:
BibDatabase: Parsed bibliographic database
"""Control how the parser handles different BibTeX entry types:
from bibtexparser.bparser import BibTexParser
# Allow non-standard entry types (like @software, @dataset)
parser = BibTexParser(ignore_nonstandard_types=False)
# Only accept standard BibTeX types (article, book, etc.)
parser = BibTexParser(ignore_nonstandard_types=True) # DefaultConfigure how fields are processed and normalized:
# Homogenize field names (e.g., 'url' -> 'link', 'keywords' -> 'keyword')
parser = BibTexParser(homogenize_fields=True)
# Keep original field names
parser = BibTexParser(homogenize_fields=False) # DefaultControl how BibTeX string definitions are processed:
# Replace string references with their values
parser = BibTexParser(interpolate_strings=True) # Default
# Keep string structure for later processing
parser = BibTexParser(interpolate_strings=False)
# Include common month abbreviations (jan, feb, etc.)
parser = BibTexParser(common_strings=True) # Default
# Don't include common strings
parser = BibTexParser(common_strings=False)Enable automatic resolution of crossref dependencies:
# Resolve crossref fields and merge referenced entries
parser = BibTexParser(add_missing_from_crossref=True)
# Keep crossref fields as-is
parser = BibTexParser(add_missing_from_crossref=False) # Defaultfrom bibtexparser.bparser import BibTexParser
def customize_entries(record):
"""Custom function to process entries during parsing."""
# Convert author names to "Last, First" format
if 'author' in record:
# Apply author processing
record = bibtexparser.customization.author(record)
# Convert LaTeX to Unicode
record = bibtexparser.customization.convert_to_unicode(record)
return record
parser = BibTexParser(customization=customize_entries)
with open('bibliography.bib') as bibtex_file:
bib_database = parser.parse_file(bibtex_file)from bibtexparser.bparser import BibTexParser
# Configure parser for maximum compatibility
parser = BibTexParser(
ignore_nonstandard_types=False, # Accept all entry types
homogenize_fields=True, # Normalize field names
common_strings=True, # Include month abbreviations
add_missing_from_crossref=True # Resolve crossrefs
)
try:
with open('messy_bibliography.bib') as bibtex_file:
# Use partial=True to continue parsing on errors
bib_database = parser.parse_file(bibtex_file, partial=True)
print(f"Parsed {len(bib_database.entries)} entries")
except Exception as e:
print(f"Parsing failed: {e}")from bibtexparser.bparser import BibTexParser
# Create parser that can be used multiple times
parser = BibTexParser()
parser.expect_multiple_parse = True # Disable warning
# Parse multiple files into the same database
for filename in ['refs1.bib', 'refs2.bib', 'refs3.bib']:
with open(filename) as bibtex_file:
bib_database = parser.parse_file(bibtex_file)
print(f"Total entries: {len(parser.bib_database.entries)}")# Configuration for strict academic BibTeX
academic_parser = BibTexParser(
ignore_nonstandard_types=True,
homogenize_fields=False,
interpolate_strings=True,
common_strings=True
)
# Configuration for modern/extended BibTeX
modern_parser = BibTexParser(
ignore_nonstandard_types=False, # Allow @software, @online, etc.
homogenize_fields=True, # Normalize field names
interpolate_strings=True,
common_strings=True,
add_missing_from_crossref=True # Handle complex references
)Install with Tessl CLI
npx tessl i tessl/pypi-bibtexparser