Python Gherkin parser that converts Gherkin feature files into structured data for behavior-driven development testing frameworks
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Multi-language keyword support for international BDD development with dynamic dialect loading. The Gherkin parser supports over 70 natural languages, allowing teams to write specifications in their native language while maintaining compatibility with testing frameworks.
Language-specific keyword sets with property-based access to translated Gherkin keywords.
class Dialect:
@classmethod
def for_name(cls, name: str) -> Self | None:
"""
Load dialect by language code.
Parameters:
- name: Language code (e.g., 'en', 'fr', 'de', 'es')
Returns:
- Dialect: Language dialect instance or None if not found
"""
def __init__(self, spec: DialectSpec) -> None:
"""
Create dialect from specification.
Parameters:
- spec: Dialect specification with keyword translations
"""
@property
def feature_keywords(self) -> list[str]:
"""Feature keywords (e.g., ['Feature', 'Fonctionnalité'])"""
@property
def rule_keywords(self) -> list[str]:
"""Rule keywords (e.g., ['Rule', 'Règle'])"""
@property
def scenario_keywords(self) -> list[str]:
"""Scenario keywords (e.g., ['Scenario', 'Scénario'])"""
@property
def scenario_outline_keywords(self) -> list[str]:
"""Scenario Outline keywords (e.g., ['Scenario Outline', 'Plan du scénario'])"""
@property
def background_keywords(self) -> list[str]:
"""Background keywords (e.g., ['Background', 'Contexte'])"""
@property
def examples_keywords(self) -> list[str]:
"""Examples keywords (e.g., ['Examples', 'Exemples'])"""
@property
def given_keywords(self) -> list[str]:
"""Given step keywords (e.g., ['Given', 'Soit'])"""
@property
def when_keywords(self) -> list[str]:
"""When step keywords (e.g., ['When', 'Quand'])"""
@property
def then_keywords(self) -> list[str]:
"""Then step keywords (e.g., ['Then', 'Alors'])"""
@property
def and_keywords(self) -> list[str]:
"""And conjunction keywords (e.g., ['And', 'Et'])"""
@property
def but_keywords(self) -> list[str]:
"""But conjunction keywords (e.g., ['But', 'Mais'])"""
spec: DialectSpec
"""Raw dialect specification"""Structured definition of language-specific keywords for all Gherkin constructs.
class DialectSpec(TypedDict):
and: list[str]
"""And conjunction keywords"""
background: list[str]
"""Background section keywords"""
but: list[str]
"""But conjunction keywords"""
examples: list[str]
"""Examples table keywords"""
feature: list[str]
"""Feature definition keywords"""
given: list[str]
"""Given step keywords"""
rule: list[str]
"""Rule section keywords"""
scenario: list[str]
"""Scenario keywords"""
scenarioOutline: list[str]
"""Scenario Outline keywords"""
then: list[str]
"""Then step keywords"""
when: list[str]
"""When step keywords"""
DIALECTS: dict[str, DialectSpec]
"""Global registry of all available language dialects"""from gherkin.dialect import Dialect, DIALECTS
# List available languages
print(f"Supported languages: {list(DIALECTS.keys())}")
# Load English dialect (default)
english = Dialect.for_name("en")
print(f"Feature keywords: {english.feature_keywords}")
print(f"Given keywords: {english.given_keywords}")
# Load French dialect
french = Dialect.for_name("fr")
print(f"Feature keywords: {french.feature_keywords}")
print(f"Given keywords: {french.given_keywords}")
# Handle unknown dialect
unknown = Dialect.for_name("xyz")
if unknown is None:
print("Dialect not found")from gherkin import Parser
from gherkin.token_matcher import TokenMatcher
# French Gherkin content
french_gherkin = """
Fonctionnalité: Connexion utilisateur
Scénario: Connexion valide
Soit un utilisateur existant
Quand il saisit des identifiants valides
Alors il devrait être connecté
"""
# Create French token matcher
french_matcher = TokenMatcher("fr")
parser = Parser()
# Parse French content
document = parser.parse(french_gherkin, french_matcher)
feature = document['feature']
print(f"Feature name: {feature['name']}")
print(f"Language: {feature['language']}")
# German example
german_gherkin = """
Funktionalität: Benutzeranmeldung
Szenario: Gültige Anmeldung
Gegeben sei ein existierender Benutzer
Wenn er gültige Anmeldedaten eingibt
Dann sollte er angemeldet sein
"""
german_matcher = TokenMatcher("de")
german_document = parser.parse(german_gherkin, german_matcher)def detect_language_from_keywords(gherkin_text: str) -> str | None:
"""Detect language from Gherkin keywords"""
lines = gherkin_text.strip().split('\n')
first_line = lines[0].strip() if lines else ""
# Check feature keywords across languages
for lang_code, dialect_spec in DIALECTS.items():
for feature_keyword in dialect_spec['feature']:
if first_line.startswith(feature_keyword + ':'):
return lang_code
return None
# Auto-detect language
french_text = "Fonctionnalité: Test automatique"
detected = detect_language_from_keywords(french_text)
print(f"Detected language: {detected}") # Output: fr
spanish_text = "Característica: Prueba automática"
detected = detect_language_from_keywords(spanish_text)
print(f"Detected language: {detected}") # Output: esfrom gherkin.stream.gherkin_events import GherkinEvents
def process_multilingual_features(features: dict[str, str]) -> None:
"""Process features in multiple languages"""
options = GherkinEvents.Options(
print_source=False,
print_ast=True,
print_pickles=True
)
processor = GherkinEvents(options)
for file_name, content in features.items():
# Auto-detect or specify language
language = detect_language_from_keywords(content) or "en"
source_event = {
"source": {
"uri": file_name,
"location": {"line": 1},
"data": content,
"mediaType": "text/x.cucumber.gherkin+plain"
}
}
for envelope in processor.enum(source_event):
if "gherkinDocument" in envelope:
doc = envelope["gherkinDocument"]
feature = doc['feature']
print(f"{file_name} ({language}): {feature['name']}")
elif "parseError" in envelope:
error = envelope["parseError"]
print(f"Error in {file_name}: {error['message']}")
# Process mixed language features
multilingual_features = {
"login_en.feature": """
Feature: User Login
Scenario: Valid login
Given a user exists
When they enter credentials
Then they are logged in
""",
"login_fr.feature": """
Fonctionnalité: Connexion utilisateur
Scénario: Connexion valide
Soit un utilisateur existant
Quand il saisit des identifiants
Alors il devrait être connecté
""",
"login_es.feature": """
Característica: Inicio de sesión
Escenario: Inicio válido
Dado que existe un usuario
Cuando ingresa credenciales
Entonces debería estar conectado
"""
}
process_multilingual_features(multilingual_features)from gherkin.dialect import DialectSpec
# Create custom dialect (hypothetical Pirate English)
pirate_spec: DialectSpec = {
"feature": ["Treasure Map"],
"scenario": ["Adventure", "Quest"],
"given": ["Ahoy", "Avast"],
"when": ["When ye"],
"then": ["Then ye shall"],
"and": ["An'", "And"],
"but": ["But ye"],
"background": ["Ship's Log"],
"examples": ["Tales"],
"rule": ["Pirate Code"],
"scenarioOutline": ["Legend"]
}
# Create dialect instance
pirate_dialect = Dialect(pirate_spec)
print(f"Feature keywords: {pirate_dialect.feature_keywords}")
print(f"Given keywords: {pirate_dialect.given_keywords}")from gherkin.errors import ParserException
def create_localized_error_handler(language: str):
"""Create error handler with language context"""
dialect = Dialect.for_name(language)
if not dialect:
dialect = Dialect.for_name("en") # Fallback to English
def handle_parse_error(error: ParserException) -> str:
"""Format error with language context"""
location = error.location
message = str(error)
# Add dialect context to error
expected_keywords = {
'Feature': dialect.feature_keywords,
'Scenario': dialect.scenario_keywords,
'Given': dialect.given_keywords,
'When': dialect.when_keywords,
'Then': dialect.then_keywords
}
localized_message = f"Parse error at line {location['line']}: {message}"
localized_message += f"\nExpected keywords in {language}:"
for keyword_type, keywords in expected_keywords.items():
localized_message += f"\n {keyword_type}: {', '.join(keywords)}"
return localized_message
return handle_parse_error
# Use with different languages
french_error_handler = create_localized_error_handler("fr")
german_error_handler = create_localized_error_handler("de")Common language dialects with their feature keywords:
| Language | Code | Feature Keywords | Example |
|---|---|---|---|
| English | en | Feature | Feature: User login |
| French | fr | Fonctionnalité | Fonctionnalité: Connexion |
| German | de | Funktionalität | Funktionalität: Benutzer |
| Spanish | es | Característica | Característica: Usuario |
| Italian | it | Funzionalità | Funzionalità: Utente |
| Portuguese | pt | Funcionalidade | Funcionalidade: Usuário |
| Russian | ru | Функция | Функция: Пользователь |
| Chinese | zh-CN | 功能 | 功能: 用户登录 |
| Japanese | ja | フィーチャ | フィーチャ: ユーザーログイン |
| Korean | ko | 기능 | 기능: 사용자 로그인 |
Access the complete list programmatically:
from gherkin.dialect import DIALECTS
for lang_code in sorted(DIALECTS.keys()):
dialect_spec = DIALECTS[lang_code]
feature_keywords = dialect_spec['feature']
print(f"{lang_code}: {', '.join(feature_keywords)}")Install with Tessl CLI
npx tessl i tessl/pypi-gherkin-official