Python Gherkin parser that converts Gherkin feature files into structured data for behavior-driven development testing frameworks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Primary parsing functionality that converts Gherkin text into structured Abstract Syntax Tree (AST) format. The parser handles tokenization, syntax analysis, and error recovery while supporting multiple input formats and comprehensive error reporting.
Main parser class that transforms Gherkin text into structured AST with configurable error handling and AST building.
class Parser:
def __init__(self, ast_builder: AstBuilder | None = None) -> None:
"""
Create a new parser instance.
Parameters:
- ast_builder: Optional custom AST builder, defaults to AstBuilder()
"""
def parse(
self,
token_scanner_or_str: TokenScanner | str,
token_matcher: TokenMatcher | None = None,
) -> GherkinDocument:
"""
Parse Gherkin text or token stream into AST.
Parameters:
- token_scanner_or_str: Either raw Gherkin text string or TokenScanner instance
- token_matcher: Optional token matcher, defaults to TokenMatcher()
Returns:
- GherkinDocument: Parsed AST representation
Raises:
- CompositeParserException: Multiple parsing errors occurred
- ParserException: Single parsing error occurred
"""
stop_at_first_error: bool
"""Whether to stop parsing at the first error or collect all errors"""Builds Abstract Syntax Tree nodes during parsing with ID generation and comment tracking.
class AstBuilder:
def __init__(self, id_generator: IdGenerator | None = None) -> None:
"""
Create AST builder with optional ID generator.
Parameters:
- id_generator: Optional ID generator, defaults to IdGenerator()
"""
def reset(self) -> None:
"""Reset builder state for new parsing session"""
def start_rule(self, rule_type: str) -> None:
"""Start processing a grammar rule"""
def end_rule(self, rule_type: str) -> None:
"""End processing a grammar rule"""
def build(self, token: Token) -> None:
"""Build AST node from token"""
def get_result(self) -> Any:
"""Get final parsed result"""
id_generator: IdGenerator
stack: list[AstNode]
comments: list[Comment]Low-level tokenization and scanning functionality for lexical analysis.
class TokenScanner:
def __init__(self, source: str) -> None:
"""
Create token scanner for Gherkin source text.
Parameters:
- source: Raw Gherkin text to tokenize
"""
def read(self) -> Token:
"""Read next token from source"""
class TokenMatcher:
def __init__(self, dialect_name: str = "en") -> None:
"""
Create token matcher for specified language dialect.
Parameters:
- dialect_name: Language dialect code (default: "en")
"""
def reset(self) -> None:
"""Reset matcher state"""
def match_FeatureLine(self, token: Token) -> bool:
"""Match feature line tokens"""
def match_ScenarioLine(self, token: Token) -> bool:
"""Match scenario line tokens"""
def match_StepLine(self, token: Token) -> bool:
"""Match step line tokens"""
class GherkinInMarkdownTokenMatcher(TokenMatcher):
"""Token matcher for Gherkin embedded in Markdown documents"""from gherkin import Parser
parser = Parser()
gherkin_text = """
Feature: Calculator
Scenario: Addition
Given I have 2 and 3
When I add them
Then I get 5
"""
document = parser.parse(gherkin_text)
feature = document['feature']
print(f"Feature: {feature['name']}")
print(f"Scenarios: {len(feature['children'])}")from gherkin import Parser
from gherkin.ast_builder import AstBuilder
from gherkin.stream.id_generator import IdGenerator
# Create custom ID generator
id_gen = IdGenerator()
ast_builder = AstBuilder(id_gen)
parser = Parser(ast_builder)
document = parser.parse(gherkin_text)from gherkin import Parser
from gherkin.errors import CompositeParserException, ParserException
parser = Parser()
invalid_gherkin = """
Feature: Invalid
Scenario:
Given step without scenario name
"""
try:
document = parser.parse(invalid_gherkin)
except CompositeParserException as e:
print(f"Multiple errors: {len(e.errors)}")
for error in e.errors:
print(f" Line {error.location['line']}: {error}")
except ParserException as e:
print(f"Parse error at line {e.location['line']}: {e}")from gherkin import Parser
from gherkin.token_scanner import TokenScanner
from gherkin.token_matcher import TokenMatcher
# Manual token processing
scanner = TokenScanner(gherkin_text)
matcher = TokenMatcher("en") # English dialect
parser = Parser()
document = parser.parse(scanner, matcher)Install with Tessl CLI
npx tessl i tessl/pypi-gherkin-official