Python Gherkin parser that converts Gherkin feature files into structured data for behavior-driven development testing frameworks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
High-level API for processing multiple Gherkin sources with configurable output formats, comprehensive error handling, and support for both classic .feature files and Gherkin-in-Markdown format. Provides event-driven processing with structured output.
Main stream processor that handles source events and generates structured output with configurable content inclusion.
class GherkinEvents:
@dataclass
class Options:
print_source: bool
"""Include source content in output"""
print_ast: bool
"""Include parsed AST in output"""
print_pickles: bool
"""Include compiled pickles in output"""
def __init__(self, options: Options) -> None:
"""
Create stream processor with output options.
Parameters:
- options: Configuration for output content
"""
def enum(
self, source_event: Event
) -> Generator[Event | Error | GherkinDocumentEnvelope | PickleEnvelope]:
"""
Process source event and generate output envelopes.
Parameters:
- source_event: Input source event with Gherkin content
Yields:
- Event: Source event (if print_source enabled)
- GherkinDocumentEnvelope: Parsed AST (if print_ast enabled)
- PickleEnvelope: Compiled pickles (if print_pickles enabled)
- Error: Parse errors with location information
"""
id_generator: IdGenerator
parser: Parser
compiler: CompilerStructured event types for stream processing input and output.
class Event(TypedDict):
source: Source
"""Source information with URI and content"""
class Source(TypedDict):
uri: str
"""Source file URI or identifier"""
location: Location
"""Source location information"""
data: str
"""Raw Gherkin source content"""
mediaType: str
"""Media type: text/x.cucumber.gherkin+plain or +markdown"""
class GherkinDocumentEnvelope(TypedDict):
gherkinDocument: GherkinDocumentWithURI
"""Parsed Gherkin document with URI"""
class PickleEnvelope(TypedDict):
pickle: Pickle
"""Executable test scenario"""
class Error(TypedDict):
parseError: ParseError
"""Parse error with source and location"""
class ParseError(TypedDict):
source: Source
"""Source that caused the error"""
message: str
"""Error description"""Utility for generating unique identifiers across the processing pipeline.
class IdGenerator:
def __init__(self) -> None:
"""Create ID generator starting from 0"""
def get_next_id(self) -> str:
"""
Generate next unique ID.
Returns:
- str: Next sequential ID as string
"""from gherkin.stream.gherkin_events import GherkinEvents
from gherkin.stream.source_events import Event
# Configure output options
options = GherkinEvents.Options(
print_source=True,
print_ast=True,
print_pickles=True
)
# Create processor
processor = GherkinEvents(options)
# Create source event
source_event: Event = {
"source": {
"uri": "features/login.feature",
"location": {"line": 1},
"data": """
Feature: User Login
Scenario: Valid credentials
Given a user exists
When they enter valid credentials
Then they should be logged in
""",
"mediaType": "text/x.cucumber.gherkin+plain"
}
}
# Process and handle results
for envelope in processor.enum(source_event):
if "source" in envelope:
print(f"Source: {envelope['source']['uri']}")
elif "gherkinDocument" in envelope:
doc = envelope["gherkinDocument"]
print(f"Feature: {doc['feature']['name']}")
elif "pickle" in envelope:
pickle = envelope["pickle"]
print(f"Scenario: {pickle['name']}")
elif "parseError" in envelope:
error = envelope["parseError"]
print(f"Error: {error['message']}")# Only generate pickles for test execution
execution_options = GherkinEvents.Options(
print_source=False, # Skip source to save memory
print_ast=False, # Skip AST
print_pickles=True # Only executable scenarios
)
processor = GherkinEvents(execution_options)
for envelope in processor.enum(source_event):
if "pickle" in envelope:
pickle = envelope["pickle"]
# Execute test scenario
run_test_scenario(pickle)# Process with error handling
debug_options = GherkinEvents.Options(
print_source=True,
print_ast=True,
print_pickles=False
)
processor = GherkinEvents(debug_options)
invalid_source: Event = {
"source": {
"uri": "broken.feature",
"location": {"line": 1},
"data": """
Feature: Broken
Scenario:
Given step without scenario name
""",
"mediaType": "text/x.cucumber.gherkin+plain"
}
}
errors = []
for envelope in processor.enum(invalid_source):
if "parseError" in envelope:
error = envelope["parseError"]
errors.append(error)
print(f"Parse error in {error['source']['uri']}: {error['message']}")
print(f"Total errors: {len(errors)}")markdown_source: Event = {
"source": {
"uri": "docs/feature.md",
"location": {"line": 1},
"data": """
# User Authentication
This document describes login functionality.
```gherkin
Feature: User Login
Scenario: Valid login
Given a user exists
When they enter credentials
Then they are logged in""",
"mediaType": "text/x.cucumber.gherkin+markdown"
}}
options = GherkinEvents.Options( print_source=False, print_ast=False, print_pickles=True )
processor = GherkinEvents(options) for envelope in processor.enum(markdown_source): if "pickle" in envelope: pickle = envelope["pickle"] print(f"Extracted scenario: {pickle['name']}")
### Batch Processing
```python
def process_multiple_files(file_paths: list[str]) -> None:
"""Process multiple Gherkin files"""
options = GherkinEvents.Options(
print_source=False,
print_ast=False,
print_pickles=True
)
processor = GherkinEvents(options)
for file_path in file_paths:
with open(file_path, 'r') as f:
content = f.read()
# Determine media type from extension
media_type = (
"text/x.cucumber.gherkin+markdown"
if file_path.endswith('.md')
else "text/x.cucumber.gherkin+plain"
)
source_event: Event = {
"source": {
"uri": file_path,
"location": {"line": 1},
"data": content,
"mediaType": media_type
}
}
pickles = []
errors = []
for envelope in processor.enum(source_event):
if "pickle" in envelope:
pickles.append(envelope["pickle"])
elif "parseError" in envelope:
errors.append(envelope["parseError"])
print(f"{file_path}: {len(pickles)} scenarios, {len(errors)} errors")
# Process all feature files
process_multiple_files([
"features/login.feature",
"features/registration.feature",
"docs/api-spec.md"
])def create_test_suite(source_events: list[Event]) -> dict:
"""Create complete test suite from multiple sources"""
options = GherkinEvents.Options(
print_source=True,
print_ast=True,
print_pickles=True
)
processor = GherkinEvents(options)
test_suite = {
"sources": [],
"features": [],
"scenarios": [],
"errors": []
}
for source_event in source_events:
for envelope in processor.enum(source_event):
if "source" in envelope:
test_suite["sources"].append(envelope["source"])
elif "gherkinDocument" in envelope:
doc = envelope["gherkinDocument"]
if doc.get("feature"):
test_suite["features"].append(doc["feature"])
elif "pickle" in envelope:
test_suite["scenarios"].append(envelope["pickle"])
elif "parseError" in envelope:
test_suite["errors"].append(envelope["parseError"])
return test_suite
# Build comprehensive test suite
sources = [
# ... create source events for all files
]
suite = create_test_suite(sources)
print(f"Test suite: {len(suite['features'])} features, "
f"{len(suite['scenarios'])} scenarios, "
f"{len(suite['errors'])} errors")Install with Tessl CLI
npx tessl i tessl/pypi-gherkin-official