An MkDocs plugin that enables managing citations with BibTex
—
Citation parsing and processing functionality that extracts and parses citation syntax from markdown content. This module handles the parsing of citation blocks, inline references, and provides data structures for representing citations.
Represents a single citation in raw markdown format without any formatting applied.
@dataclass
class Citation:
"""Represents a citation in raw markdown without formatting"""
key: str # The citation key (without @ symbol)
prefix: str = "" # Text before the citation key
suffix: str = "" # Text after the citation key
def __str__(self) -> str:
"""
String representation of the citation.
Returns:
str: Formatted citation string with prefix, @key, and suffix
"""
@classmethod
def from_markdown(cls, markdown: str) -> list["Citation"]:
"""
Extracts citations from a markdown string.
Args:
markdown (str): Markdown text containing citations
Returns:
list[Citation]: List of parsed Citation objects
Note:
Filters out email addresses to avoid false matches
"""Represents a block of citations enclosed in square brackets, which may contain multiple citations separated by semicolons.
@dataclass
class CitationBlock:
"""Represents a block of citations in square brackets"""
citations: list[Citation] # List of citations in this block
raw: str = "" # Raw markdown text of the block
def __str__(self) -> str:
"""
String representation of the citation block.
Returns:
str: Formatted citation block with square brackets
"""
@classmethod
def from_markdown(cls, markdown: str) -> list["CitationBlock"]:
"""
Extracts citation blocks from a markdown string.
Process:
1. Find all square bracket blocks
2. For each block, try to extract citations
3. If successful, create CitationBlock object
4. Skip blocks that don't contain valid citations
Args:
markdown (str): Markdown text containing citation blocks
Returns:
list[CitationBlock]: List of parsed CitationBlock objects
"""Represents an inline citation reference that appears directly in text without square brackets.
@dataclass
class InlineReference:
"""Represents an inline citation reference"""
key: str # The citation key (without @ symbol)
def __str__(self) -> str:
"""
String representation of the inline reference.
Returns:
str: Formatted as @key
"""
def __hash__(self) -> int:
"""
Hash implementation for use in sets.
Returns:
int: Hash based on citation key
"""
@classmethod
def from_markdown(cls, markdown: str) -> list["InlineReference"]:
"""
Finds inline references in the markdown text.
Note:
Only use this after processing all regular citations to avoid conflicts
Args:
markdown (str): Markdown text containing inline references
Returns:
list[InlineReference]: List of parsed InlineReference objects
"""Pre-compiled regular expression patterns used for citation parsing.
CITATION_REGEX: re.Pattern[str]
"""Pattern for matching individual citations with optional prefix/suffix"""
CITATION_BLOCK_REGEX: re.Pattern[str]
"""Pattern for matching citation blocks in square brackets"""
EMAIL_REGEX: re.Pattern[str]
"""Pattern for matching email addresses to avoid false citation matches"""
INLINE_REFERENCE_REGEX: re.Pattern[str]
"""Pattern for matching inline references outside of citation blocks"""from mkdocs_bibtex.citation import CitationBlock
# Citation block with multiple citations
markdown = "This references [@smith2020; @jones2019, pp. 100-120]."
citation_blocks = CitationBlock.from_markdown(markdown)
for block in citation_blocks:
print(f"Block: {block}")
for citation in block.citations:
print(f" Key: {citation.key}")
print(f" Prefix: '{citation.prefix}'")
print(f" Suffix: '{citation.suffix}'")
# Output:
# Block: [@smith2020; @jones2019, pp. 100-120]
# Key: smith2020
# Prefix: ''
# Suffix: ''
# Key: jones2019
# Prefix: ''
# Suffix: 'pp. 100-120'from mkdocs_bibtex.citation import Citation
# Citation with prefix and suffix
citation_text = "see @author2021, pp. 25-30"
citations = Citation.from_markdown(citation_text)
for citation in citations:
print(f"Key: {citation.key}")
print(f"Prefix: '{citation.prefix}'")
print(f"Suffix: '{citation.suffix}'")
# Output:
# Key: author2021
# Prefix: 'see'
# Suffix: 'pp. 25-30'from mkdocs_bibtex.citation import InlineReference
# Text with inline citations
markdown = "According to @smith2020, the results show @jones2019 was correct."
inline_refs = InlineReference.from_markdown(markdown)
for ref in inline_refs:
print(f"Inline reference: {ref}")
# Output:
# Inline reference: @smith2020
# Inline reference: @jones2019from mkdocs_bibtex.citation import CitationBlock, InlineReference
markdown_content = '''
# My Document
This cites [@primary2020; see @secondary2019, pp. 100].
The method from @author2021 shows interesting results.
\bibliography
'''
# Step 1: Process citation blocks first
citation_blocks = CitationBlock.from_markdown(markdown_content)
print(f"Found {len(citation_blocks)} citation blocks")
# Step 2: Process inline references (after blocks to avoid conflicts)
inline_refs = InlineReference.from_markdown(markdown_content)
print(f"Found {len(inline_refs)} inline references")
# Step 3: Extract all unique keys
all_keys = set()
for block in citation_blocks:
for citation in block.citations:
all_keys.add(citation.key)
for ref in inline_refs:
all_keys.add(ref.key)
print(f"Total unique citations: {all_keys}")
# Output: {'primary2020', 'secondary2019', 'author2021'}Citation blocks are enclosed in square brackets and can contain multiple citations:
[@single_citation]
[@first; @second]
[@author2020, pp. 100-120]
[see @author2020, pp. 100; @other2019]Inline citations appear directly in text:
According to @author2020, the method works.
The @author2020 approach is effective.
Results from @study2019 confirm this.[See @primary2020, pp. 25-30; cf. @secondary2019; @tertiary2018, ch. 5]
[@author2020, Figure 3; @coauthor2020, Table 2]
[e.g., @example2019; @another2020; but see @contrary2018]The citation parsing system includes robust error handling:
Install with Tessl CLI
npx tessl i tessl/pypi-mkdocs-bibtex