Comprehensive Chinese character to Pinyin conversion library with intelligent word segmentation and multiple output styles
npx @tessl/cli install tessl/pypi-pypinyin@0.55.0A comprehensive Chinese character to Pinyin conversion library for Python that provides intelligent word segmentation to match accurate pronunciation for multi-character phrases. It supports polyphonic characters with heteronym detection, multiple Pinyin output styles including tone marks, tone numbers, first letters, initials/finals separation, and Bopomofo notation.
pip install pypinyinimport pypinyinCommon imports for core functionality:
from pypinyin import pinyin, lazy_pinyin, slug, StyleFor style constants:
from pypinyin import (
NORMAL, TONE, TONE2, TONE3,
INITIALS, FIRST_LETTER, FINALS, FINALS_TONE, FINALS_TONE2, FINALS_TONE3,
BOPOMOFO, BOPOMOFO_FIRST, CYRILLIC, CYRILLIC_FIRST,
WADEGILES, GWOYEU, BRAILLE_MAINLAND, BRAILLE_MAINLAND_TONE
)from pypinyin import pinyin, lazy_pinyin, slug, Style
# Basic pinyin conversion with tone marks
text = "中国"
result = pinyin(text)
print(result) # [['zhōng'], ['guó']]
# Simple pinyin without tone marks
result = lazy_pinyin(text)
print(result) # ['zhong', 'guo']
# Different output styles
result = pinyin(text, style=Style.TONE3)
print(result) # [['zhong1'], ['guo2']]
result = pinyin(text, style=Style.FIRST_LETTER)
print(result) # [['z'], ['g']]
# Generate URL-friendly slugs
slug_text = slug(text)
print(slug_text) # zhong-guo
# Handle polyphonic characters (heteronyms)
text = "银行" # can be pronounced different ways
result = pinyin(text, heteronym=True)
print(result) # [['yín'], ['háng', 'xíng']]pypinyin is built around a modular architecture:
Primary functions for converting Chinese characters to pinyin with various output options, heteronym support, and customization.
def pinyin(hans, style=Style.TONE, heteronym=False, errors='default', strict=True, v_to_u=False, neutral_tone_with_five=False): ...
def lazy_pinyin(hans, style=Style.NORMAL, errors='default', strict=True, v_to_u=False, neutral_tone_with_five=False, tone_sandhi=False): ...
def slug(hans, style=Style.NORMAL, heteronym=False, separator='-', errors='default', strict=True): ...Comprehensive style system controlling pinyin output format including tones, initials/finals, alternative notation systems, and specialized styles.
class Style(IntEnum):
NORMAL = 0
TONE = 1
TONE2 = 2
INITIALS = 3
FIRST_LETTER = 4
FINALS = 5
FINALS_TONE = 6
FINALS_TONE2 = 7
TONE3 = 8
FINALS_TONE3 = 9
BOPOMOFO = 10
BOPOMOFO_FIRST = 11
CYRILLIC = 12
CYRILLIC_FIRST = 13
WADEGILES = 14
GWOYEU = 15
BRAILLE_MAINLAND = 16
BRAILLE_MAINLAND_TONE = 17Functions for loading custom pronunciation dictionaries to override default pinyin mappings for specific characters or phrases.
def load_single_dict(pinyin_dict, style='default'): ...
def load_phrases_dict(phrases_dict, style='default'): ...Command-line tools for batch processing and format conversion.
pypinyin [options] [input_text]
python -m pypinyin.tools.toneconvert [action] [input]Extended functionality including custom converters, tone sandhi processing, segmentation control, and specialized mixins.
class Pinyin:
def __init__(self, converter=None): ...
class DefaultConverter: ...
class UltimateConverter: ...class PinyinNotFoundException(Exception):
"""
Raised when no pinyin pronunciation found for input characters.
Attributes:
- message (str): Exception message
- chars (str): Characters that caused the exception
"""
def __init__(self, chars):
"""Initialize exception with problematic characters."""
self.message = 'No pinyin found for character "{}"'.format(chars)
self.chars = chars
super(PinyinNotFoundException, self).__init__(self.message)Common error handling patterns:
from pypinyin import pinyin, PinyinNotFoundException
try:
result = pinyin("some text", errors='exception')
except PinyinNotFoundException as e:
print(f"No pinyin found: {e.message}")
print(f"Problematic characters: {e.chars}")