A comprehensive BibTeX parser library for Python 3 that enables parsing and writing of bibliographic data files
npx @tessl/cli install tessl/pypi-bibtexparser@1.4.0A comprehensive BibTeX parser library for Python 3 that enables developers to parse and manipulate bibliographic data files. It provides a simple JSON-like API for loading BibTeX files into BibDatabase objects, supports both parsing from strings and files, and provides writing capabilities to export data back to BibTeX format.
pip install bibtexparserimport bibtexparserFor advanced usage with custom parsers and writers:
from bibtexparser.bparser import BibTexParser
from bibtexparser.bwriter import BibTexWriter
from bibtexparser.bibdatabase import BibDatabaseimport bibtexparser
# Parse a BibTeX file
with open('bibliography.bib') as bibtex_file:
bib_database = bibtexparser.load(bibtex_file)
# Access entries
for entry in bib_database.entries:
print(f"{entry['ID']}: {entry.get('title', 'No title')}")
# Parse from string
bibtex_str = """
@article{Einstein1905,
title={On the electrodynamics of moving bodies},
author={Einstein, Albert},
journal={Annalen der Physik},
year={1905}
}
"""
bib_database = bibtexparser.loads(bibtex_str)
# Write back to BibTeX format
bibtex_output = bibtexparser.dumps(bib_database)
print(bibtex_output)
# Write to file
with open('output.bib', 'w') as bibtex_file:
bibtexparser.dump(bib_database, bibtex_file)bibtexparser uses a three-layer architecture:
The library supports string interpolation, cross-reference resolution, and extensive customization through parsing hooks and field processing functions.
Simple interface for parsing BibTeX strings and files into BibDatabase objects, and writing them back to BibTeX format. These functions handle the most common use cases with sensible defaults.
def loads(bibtex_str: str, parser=None) -> BibDatabase: ...
def load(bibtex_file, parser=None) -> BibDatabase: ...
def dumps(bib_database: BibDatabase, writer=None) -> str: ...
def dump(bib_database: BibDatabase, bibtex_file, writer=None) -> None: ...Configurable parser with options for handling non-standard entries, field homogenization, string interpolation, and cross-reference resolution. Includes customization hooks for processing entries during parsing.
class BibTexParser:
def __init__(
self,
customization=None,
ignore_nonstandard_types: bool = True,
homogenize_fields: bool = False,
interpolate_strings: bool = True,
common_strings: bool = True,
add_missing_from_crossref: bool = False
): ...
def parse(self, bibtex_str: str, partial: bool = False) -> BibDatabase: ...
def parse_file(self, file, partial: bool = False) -> BibDatabase: ...Configurable writer with extensive formatting options including field ordering, indentation, alignment, and entry sorting. Supports various BibTeX syntax styles and output customization.
class BibTexWriter:
def __init__(self, write_common_strings: bool = False): ...
def write(self, bib_database: BibDatabase) -> str: ...
from enum import Enum
class SortingStrategy(Enum):
ALPHABETICAL_ASC = auto()
ALPHABETICAL_DESC = auto()
PRESERVE = auto()Core data structures for representing bibliographic databases including entries, comments, preambles, and string definitions. Supports cross-reference resolution and string expansion.
class BibDatabase:
entries: list
comments: list
strings: dict
preambles: list
def load_common_strings(self) -> None: ...
def get_entry_dict(self) -> dict: ...
def expand_string(self, name: str) -> str: ...
def add_missing_from_crossref(self) -> None: ...Collection of functions for customizing and processing bibliographic entries including name parsing, field normalization, LaTeX encoding conversion, and specialized field handling.
def author(record: dict) -> dict: ...
def editor(record: dict) -> dict: ...
def journal(record: dict) -> dict: ...
def keyword(record: dict, sep: str = ',|;') -> dict: ...
def convert_to_unicode(record: dict) -> dict: ...
def homogenize_latex_encoding(record: dict) -> dict: ...Utilities for converting between LaTeX-encoded text and Unicode, supporting a comprehensive range of special characters, accents, and symbols commonly found in bibliographic data.
def latex_to_unicode(string: str) -> str: ...
def string_to_latex(string: str) -> str: ...
def protect_uppercase(string: str) -> str: ...class BibDatabase:
"""Main bibliographic database container."""
entries: list # List of entry dictionaries
comments: list # List of comment strings
strings: dict # Dictionary of string definitions
preambles: list # List of preamble strings
class BibDataString:
"""Represents a BibTeX string definition."""
def __init__(self, bibdatabase: BibDatabase, name: str): ...
def get_value(self) -> str: ...
class BibDataStringExpression:
"""Represents BibTeX string expressions (concatenated strings)."""
def __init__(self, expression: list): ...
def get_value(self) -> str: ...
class UndefinedString(KeyError):
"""Exception raised when referencing undefined string."""
pass
class InvalidName(ValueError):
"""Exception raised for invalid name format."""
pass