Comprehensive Chinese character to Pinyin conversion library with intelligent word segmentation and multiple output styles
—
Comprehensive style system controlling pinyin output format including tones, initials/finals separation, alternative notation systems, and specialized romanization schemes.
The Style enum provides all available output format options for pinyin conversion.
class Style(IntEnum):
"""Enumeration of all available pinyin output styles."""
NORMAL = 0 # Normal pinyin without tones
TONE = 1 # Standard tone marks (default)
TONE2 = 2 # Tone numbers after vowels
INITIALS = 3 # Initial consonants only
FIRST_LETTER = 4 # First letter only
FINALS = 5 # Final vowels/consonants without tones
FINALS_TONE = 6 # Finals with standard tone marks
FINALS_TONE2 = 7 # Finals with tone numbers after vowels
TONE3 = 8 # Tone numbers after complete pinyin
FINALS_TONE3 = 9 # Finals with tone numbers after finals
BOPOMOFO = 10 # Bopomofo (Zhuyin) notation
BOPOMOFO_FIRST = 11 # First Bopomofo character
CYRILLIC = 12 # Cyrillic transliteration
CYRILLIC_FIRST = 13 # First Cyrillic character
WADEGILES = 14 # Wade-Giles romanization
GWOYEU = 15 # Gwoyeu Romatzyh (National Romanization)
BRAILLE_MAINLAND = 16 # Mainland Chinese Braille
BRAILLE_MAINLAND_TONE = 17 # Mainland Chinese Braille with tonesCore pinyin styles with different tone representation methods.
STYLE_NORMAL = 0
NORMAL = 0Plain pinyin without tone markers.
from pypinyin import lazy_pinyin, Style, NORMAL
result = lazy_pinyin('中国', style=Style.NORMAL)
print(result) # ['zhong', 'guo']
result = lazy_pinyin('中国', style=NORMAL)
print(result) # ['zhong', 'guo']STYLE_TONE = 1
TONE = 1Standard pinyin with diacritical tone marks (default style).
from pypinyin import lazy_pinyin, Style, TONE
result = lazy_pinyin('中国', style=Style.TONE)
print(result) # ['zhōng', 'guó']
result = lazy_pinyin('中国', style=TONE)
print(result) # ['zhōng', 'guó']STYLE_TONE2 = 2
TONE2 = 2Tone numbers placed after the vowel carrying the tone.
from pypinyin import lazy_pinyin, Style, TONE2
result = lazy_pinyin('中国', style=Style.TONE2)
print(result) # ['zho1ng', 'guo2']
result = lazy_pinyin('中国', style=TONE2)
print(result) # ['zho1ng', 'guo2']STYLE_TONE3 = 8
TONE3 = 8Tone numbers appended to complete pinyin syllables.
from pypinyin import lazy_pinyin, Style, TONE3
result = lazy_pinyin('中国', style=Style.TONE3)
print(result) # ['zhong1', 'guo2']
result = lazy_pinyin('中国', style=TONE3)
print(result) # ['zhong1', 'guo2']Styles that separate or extract specific parts of pinyin syllables.
STYLE_INITIALS = 3
INITIALS = 3Extract only initial consonants from pinyin syllables.
from pypinyin import lazy_pinyin, Style, INITIALS
result = lazy_pinyin('中华', style=Style.INITIALS)
print(result) # ['zh', 'h']
result = lazy_pinyin('安全', style=INITIALS)
print(result) # ['', 'q'] # 安 has no initial consonantSTYLE_FIRST_LETTER = 4
FIRST_LETTER = 4Extract only the first letter of each pinyin syllable.
from pypinyin import lazy_pinyin, Style, FIRST_LETTER
result = lazy_pinyin('中华人民共和国', style=Style.FIRST_LETTER)
print(result) # ['z', 'h', 'r', 'm', 'g', 'h', 'g']
result = lazy_pinyin('中华人民共和国', style=FIRST_LETTER)
print(result) # ['z', 'h', 'r', 'm', 'g', 'h', 'g']STYLE_FINALS = 5
FINALS = 5Extract final vowels and consonants without tone markers.
from pypinyin import lazy_pinyin, Style, FINALS
result = lazy_pinyin('中华', style=Style.FINALS)
print(result) # ['ong', 'ua']
result = lazy_pinyin('中华', style=FINALS)
print(result) # ['ong', 'ua']STYLE_FINALS_TONE = 6
FINALS_TONE = 6Finals with standard diacritical tone marks.
from pypinyin import lazy_pinyin, Style, FINALS_TONE
result = lazy_pinyin('中华', style=Style.FINALS_TONE)
print(result) # ['ōng', 'uá']
result = lazy_pinyin('中华', style=FINALS_TONE)
print(result) # ['ōng', 'uá']STYLE_FINALS_TONE2 = 7
FINALS_TONE2 = 7
STYLE_FINALS_TONE3 = 9
FINALS_TONE3 = 9Finals with tone numbers in different positions.
from pypinyin import lazy_pinyin, Style, FINALS_TONE2, FINALS_TONE3
# Tone numbers after vowels
result = lazy_pinyin('中华', style=Style.FINALS_TONE2)
print(result) # ['o1ng', 'ua2']
# Tone numbers after complete finals
result = lazy_pinyin('中华', style=FINALS_TONE3)
print(result) # ['ong1', 'ua2']Styles providing alternative phonetic notation systems beyond standard pinyin.
STYLE_BOPOMOFO = 10
BOPOMOFO = 10
STYLE_BOPOMOFO_FIRST = 11
BOPOMOFO_FIRST = 11Traditional Chinese phonetic notation system.
from pypinyin import lazy_pinyin, Style, BOPOMOFO, BOPOMOFO_FIRST
# Full Bopomofo
result = lazy_pinyin('中国', style=Style.BOPOMOFO)
print(result) # ['ㄓㄨㄥ', 'ㄍㄨㄛˊ']
# First Bopomofo character only
result = lazy_pinyin('中国', style=BOPOMOFO_FIRST)
print(result) # ['ㄓ', 'ㄍ']STYLE_CYRILLIC = 12
CYRILLIC = 12
STYLE_CYRILLIC_FIRST = 13
CYRILLIC_FIRST = 13Cyrillic-based transliteration system.
from pypinyin import lazy_pinyin, Style, CYRILLIC, CYRILLIC_FIRST
# Full Cyrillic
result = lazy_pinyin('中国', style=Style.CYRILLIC)
print(result) # ['чжун', 'го']
# First Cyrillic character only
result = lazy_pinyin('中国', style=CYRILLIC_FIRST)
print(result) # ['ч', 'г']STYLE_WADEGILES = 14
WADEGILES = 14Wade-Giles romanization system, commonly used for traditional Chinese transliteration.
from pypinyin import lazy_pinyin, Style, WADEGILES
# Wade-Giles romanization
result = lazy_pinyin('威妥玛拼音', style=Style.WADEGILES)
print(result) # ['wei', 'tʻo', 'ma', 'pʻin', 'yin']
result = lazy_pinyin('中国', style=WADEGILES)
print(result) # ['chung', 'kuo']STYLE_GWOYEU = 15
GWOYEU = 15National Romanization system where tones are indicated by spelling changes.
from pypinyin import lazy_pinyin, Style, GWOYEU
# Gwoyeu Romatzyh
result = lazy_pinyin('中国', style=Style.GWOYEU)
print(result) # ['jong', 'gwo']
result = lazy_pinyin('中华人民共和国', style=GWOYEU)
print(result) # ['jong', 'hwa', 'ren', 'min', 'gonq', 'her', 'gwo']STYLE_BRAILLE_MAINLAND = 16
BRAILLE_MAINLAND = 16
STYLE_BRAILLE_MAINLAND_TONE = 17
BRAILLE_MAINLAND_TONE = 17Mainland Chinese Braille notation for accessibility applications.
from pypinyin import lazy_pinyin, Style, BRAILLE_MAINLAND, BRAILLE_MAINLAND_TONE
# Mainland Chinese Braille without tones
result = lazy_pinyin('中国', style=Style.BRAILLE_MAINLAND)
print(result) # ['⠌⠲', '⠛⠕']
# Mainland Chinese Braille with tone marks
result = lazy_pinyin('中国', style=BRAILLE_MAINLAND_TONE)
print(result) # ['⠌⠲⠁', '⠛⠕⠂']
# Longer text example
result = lazy_pinyin('时间不早了', style=Style.BRAILLE_MAINLAND)
print(result) # ['⠱⠊', '⠚⠔', '⠃⠥', '⠵⠦', '⠇⠔']from pypinyin import lazy_pinyin, Style
text = '北京大学'
# URL slugs - use NORMAL
slug_friendly = lazy_pinyin(text, style=Style.NORMAL)
print('-'.join(slug_friendly)) # 'bei-jing-da-xue'
# Search indexing - use FIRST_LETTER
search_index = lazy_pinyin(text, style=Style.FIRST_LETTER)
print(''.join(search_index)) # 'bjdx'
# Learning materials - use TONE3 (numbers are easier to type)
learning = lazy_pinyin(text, style=Style.TONE3)
print(' '.join(learning)) # 'bei3 jing1 da4 xue2'
# Phonetic analysis - use INITIALS and FINALS
initials = lazy_pinyin(text, style=Style.INITIALS)
finals = lazy_pinyin(text, style=Style.FINALS_TONE)
print('Initials:', initials) # ['b', 'j', 'd', 'x']
print('Finals:', finals) # ['ēi', 'īng', 'à', 'üé']Both long and short forms are available for all style constants:
# These are equivalent
from pypinyin import STYLE_TONE, TONE
from pypinyin import STYLE_NORMAL, NORMAL
from pypinyin import STYLE_FIRST_LETTER, FIRST_LETTER
# All produce the same result
result1 = lazy_pinyin('中国', style=Style.TONE)
result2 = lazy_pinyin('中国', style=STYLE_TONE)
result3 = lazy_pinyin('中国', style=TONE)Install with Tessl CLI
npx tessl i tessl/pypi-pypinyin