Tessl Tile for pypi/pypinyin@0.55.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

advanced-features.md command-line-tools.md core-functions.md dictionary-customization.md index.md styles-formatting.md

advanced-features.mddocs/

0
# Advanced Features
1

2
Extended functionality including custom converters, tone sandhi processing, segmentation control, and specialized mixins for advanced pinyin processing scenarios.
3

4
## Capabilities
5

6
### Core Pinyin Class
7

8
The main Pinyin class provides configurable converter backends for advanced customization.
9

10
```python { .api }
11
class Pinyin:
12
    """Main pinyin conversion class with configurable converter backend."""
13
    
14
    def __init__(self, converter=None):
15
        """
16
        Initialize Pinyin converter.
17
        
18
        Parameters:
19
        - converter: Custom converter instance (default: DefaultConverter)
20
        """
21
    
22
    def pinyin(self, hans, style=Style.TONE, heteronym=False, errors='default', strict=True):
23
        """Convert Chinese characters to pinyin using configured converter."""
24
    
25
    def lazy_pinyin(self, hans, style=Style.NORMAL, errors='default', strict=True):
26
        """Convert Chinese characters to pinyin (lazy mode) using configured converter."""
27
```
28

29
#### Usage Examples
30

31
```python
32
from pypinyin.core import Pinyin
33
from pypinyin.converter import DefaultConverter, UltimateConverter
34
from pypinyin import Style
35

36
# Use default converter
37
pinyin_converter = Pinyin()
38
result = pinyin_converter.pinyin('中国')
39
print(result)  # [['zhōng'], ['guó']]
40

41
# Use advanced converter
42
ultimate_converter = UltimateConverter()
43
pinyin_converter = Pinyin(converter=ultimate_converter)
44
result = pinyin_converter.pinyin('中国')
45
print(result)  # Enhanced conversion with ultimate converter
46

47
# Custom converter configuration
48
custom_converter = DefaultConverter()
49
# Configure custom converter settings...
50
pinyin_converter = Pinyin(converter=custom_converter)
51
```
52

53
### Converter Classes
54

55
Pluggable converter implementations providing different processing backends.
56

57
#### DefaultConverter
58

59
```python { .api }
60
class DefaultConverter:
61
    """Basic pinyin converter implementation."""
62
    
63
    def __init__(self):
64
        """Initialize default converter with standard settings."""
65
    
66
    def convert(self, han, style, errors, strict):
67
        """
68
        Convert single character to pinyin.
69
        
70
        Parameters:
71
        - han (str): Chinese character to convert
72
        - style (Style): Output style
73
        - errors (str): Error handling strategy
74
        - strict (bool): Strict mode
75
        
76
        Returns:
77
        list: Pinyin pronunciations for the character
78
        """
79
```
80

81
#### UltimateConverter
82

83
```python { .api }
84
class UltimateConverter:
85
    """Enhanced converter with advanced processing options."""
86
    
87
    def __init__(self):
88
        """Initialize ultimate converter with enhanced features."""
89
    
90
    def convert(self, han, style, errors, strict):
91
        """Convert single character with enhanced processing."""
92
```
93

94
#### Usage Examples
95

96
```python
97
from pypinyin.converter import DefaultConverter, UltimateConverter
98
from pypinyin.core import Pinyin
99
from pypinyin import Style
100

101
# Compare converter outputs
102
text = '重庆'
103

104
# Default converter
105
default_conv = DefaultConverter()
106
pinyin_default = Pinyin(converter=default_conv)
107
result1 = pinyin_default.pinyin(text)
108
print(f"Default: {result1}")
109

110
# Ultimate converter  
111
ultimate_conv = UltimateConverter()
112
pinyin_ultimate = Pinyin(converter=ultimate_conv)
113
result2 = pinyin_ultimate.pinyin(text)
114
print(f"Ultimate: {result2}")
115

116
# Custom converter subclass
117
class CustomConverter(DefaultConverter):
118
    def convert(self, han, style, errors, strict):
119
        # Custom processing logic
120
        result = super().convert(han, style, errors, strict)
121
        # Post-process result...
122
        return result
123

124
custom_conv = CustomConverter() 
125
pinyin_custom = Pinyin(converter=custom_conv)
126
result3 = pinyin_custom.pinyin(text)
127
print(f"Custom: {result3}")
128
```
129

130
### Contrib Modules - Advanced Processing
131

132
Extended processing capabilities through contrib mixins and modules.
133

134
#### Tone Sandhi Processing
135

136
```python { .api }
137
# pypinyin.contrib.tone_sandhi
138
class ToneSandhiMixin:
139
    """Mixin providing tone sandhi rule processing."""
140
    
141
    def pre_handle_tone_sandhi(self, han_list):
142
        """Apply tone sandhi rules to character sequence."""
143
```
144

145
Tone sandhi automatically applies tone change rules for natural pronunciation:
146

147
```python
148
from pypinyin.contrib.tone_sandhi import ToneSandhiMixin
149
from pypinyin import lazy_pinyin
150

151
# Enable tone sandhi in lazy_pinyin
152
result = lazy_pinyin('一个', tone_sandhi=True)
153
print(result)  # ['yí', 'gè']  # 一 changes from tone 1 to tone 2
154

155
result = lazy_pinyin('不用', tone_sandhi=True)
156
print(result)  # ['bú', 'yòng']  # 不 changes from tone 4 to tone 2
157

158
# Common tone sandhi patterns
159
examples = [
160
    ('一天', ['yì', 'tiān']),      # 一 + 1st tone -> 4th tone
161
    ('一个', ['yí', 'gè']),        # 一 + 4th tone -> 2nd tone  
162
    ('一些', ['yì', 'xiē']),       # 一 + 1st tone -> 4th tone
163
    ('不对', ['bú', 'duì']),       # 不 + 4th tone -> 2nd tone
164
    ('不好', ['bù', 'hǎo']),       # 不 + 3rd tone -> 4th tone
165
]
166

167
for text, expected in examples:
168
    result = lazy_pinyin(text, tone_sandhi=True)
169
    print(f"{text}: {result}")
170
```
171

172
#### Character Variant Handling
173

174
```python { .api }
175
# pypinyin.contrib.uv
176
class V2UMixin:
177
    """Mixin handling v/ü character conversion."""
178
    
179
    def pre_handle_v_to_u(self, han_list):
180
        """Convert 'v' characters to 'ü' in output."""
181
```
182

183
```python
184
from pypinyin import lazy_pinyin, Style
185

186
# Standard output with 'v'
187
result = lazy_pinyin('女', style=Style.TONE2)  
188
print(result)  # ['nv3']
189

190
# Convert 'v' to 'ü'
191
result = lazy_pinyin('女', style=Style.TONE2, v_to_u=True)
192
print(result)  # ['nü3']
193

194
# Works with different styles
195
result = lazy_pinyin('绿', style=Style.NORMAL, v_to_u=True)
196
print(result)  # ['lü'] instead of ['lv']
197
```
198

199
#### Neutral Tone Handling
200

201
```python { .api }
202
# pypinyin.contrib.neutral_tone
203
class NeutralToneWith5Mixin:
204
    """Mixin for neutral tone handling with number 5."""
205
    
206
    def pre_handle_neutral_tone_with_5(self, han_list):
207
        """Use '5' for neutral tone in numeric styles."""
208
```
209

210
```python
211
from pypinyin import lazy_pinyin, Style
212

213
# Standard neutral tone representation
214
result = lazy_pinyin('的', style=Style.TONE3)
215
print(result)  # ['de'] (no tone number for neutral tone)
216

217
# Use '5' for neutral tone
218
result = lazy_pinyin('的', style=Style.TONE3, neutral_tone_with_five=True)
219
print(result)  # ['de5']
220

221
# Examples with neutral tone particles
222
particles = ['的', '了', '着', '过']
223
for particle in particles:
224
    standard = lazy_pinyin(particle, style=Style.TONE3)
225
    with_five = lazy_pinyin(particle, style=Style.TONE3, neutral_tone_with_five=True)
226
    print(f"{particle}: {standard} -> {with_five}")
227
```
228

229
### Segmentation Modules
230

231
Word boundary detection modules for accurate pronunciation through proper segmentation.
232

233
#### MMSeg Segmentation
234

235
```python { .api }
236
# pypinyin.seg.mmseg
237
def seg(hans):
238
    """
239
    Segment Chinese text using MMSeg algorithm.
240
    
241
    Parameters:
242
    - hans (str): Chinese text to segment
243
    
244
    Returns:
245
    list: List of segmented words
246
    """
247
```
248

249
```python
250
from pypinyin.seg.mmseg import seg
251
from pypinyin import lazy_pinyin
252

253
# Compare with and without segmentation
254
text = '研究生命的起源'
255

256
# Without proper segmentation (character by character)
257
result1 = lazy_pinyin(text)
258
print(f"Character-by-character: {result1}")
259

260
# With MMSeg segmentation
261
segments = seg(text)
262
print(f"Segments: {segments}")  # Better word boundaries
263

264
# Apply segmentation for better pronunciation
265
segmented_text = ' '.join(segments)
266
result2 = lazy_pinyin(segmented_text)
267
print(f"Segmented: {result2}")
268
```
269

270
#### Simple Segmentation
271

272
```python { .api }
273
# pypinyin.seg.simpleseg  
274
def seg(hans):
275
    """
276
    Simple character-by-character segmentation.
277
    
278
    Parameters:
279
    - hans (str): Chinese text to segment
280
    
281
    Returns:
282
    list: List of individual characters
283
    """
284
```
285

286
```python
287
from pypinyin.seg.simpleseg import seg
288

289
text = '中华人民共和国'
290
segments = seg(text)
291
print(segments)  # ['中', '华', '人', '民', '共', '和', '国']
292
```
293

294
### Tone Conversion Utilities
295

296
Direct tone style conversion functions for format transformation.
297

298
```python { .api }
299
# pypinyin.contrib.tone_convert
300
def tone_to_tone2(tone_pinyin):
301
    """Convert tone marks to tone2 format."""
302

303
def tone2_to_tone(tone2_pinyin):
304
    """Convert tone2 format to tone marks."""
305

306
def tone_to_tone3(tone_pinyin):
307
    """Convert tone marks to tone3 format."""
308

309
def tone3_to_tone(tone3_pinyin):
310
    """Convert tone3 format to tone marks."""
311

312
# Additional conversion functions for all style pairs...
313
```
314

315
#### Usage Examples
316

317
```python
318
from pypinyin.contrib.tone_convert import (
319
    tone_to_tone2, tone2_to_tone, 
320
    tone_to_tone3, tone3_to_tone
321
)
322

323
# Convert between tone formats
324
original = 'zhōng guó'
325

326
# To tone2 (numbers after vowels)
327
tone2_result = tone_to_tone2(original)
328
print(f"Tone2: {tone2_result}")  # zho1ng guo2
329

330
# To tone3 (numbers after pinyin)
331
tone3_result = tone_to_tone3(original)
332
print(f"Tone3: {tone3_result}")  # zhong1 guo2
333

334
# Back to tone marks
335
back_to_tone = tone3_to_tone(tone3_result)
336
print(f"Back to tone: {back_to_tone}")  # zhōng guó
337

338
# Chain conversions
339
conversion_chain = [
340
    ('Original', 'zhōng guó'),
341
    ('Tone2', tone_to_tone2('zhōng guó')),
342
    ('Tone3', tone_to_tone3('zhōng guó')),
343
    ('Back', tone3_to_tone(tone_to_tone3('zhōng guó')))
344
]
345

346
for label, result in conversion_chain:
347
    print(f"{label}: {result}")
348
```
349

350
## Advanced Integration Patterns
351

352
### Custom Converter Development
353

354
Creating specialized converters for domain-specific needs:
355

356
```python
357
from pypinyin.converter import DefaultConverter
358
from pypinyin.core import Pinyin
359
from pypinyin import Style
360

361
class DomainSpecificConverter(DefaultConverter):
362
    """Custom converter for domain-specific pronunciation."""
363
    
364
    def __init__(self, domain='general'):
365
        super().__init__()
366
        self.domain = domain
367
        self.domain_dict = self._load_domain_dict()
368
    
369
    def _load_domain_dict(self):
370
        """Load domain-specific pronunciation mappings."""
371
        domain_mappings = {
372
            'medical': {
373
                '症': ['zhèng'],  # Medical symptom context
374
                '脉': ['mài'],    # Pulse context
375
            },
376
            'legal': {
377
                '法': ['fǎ'],     # Law context
378
                '案': ['àn'],     # Legal case context
379
            }
380
        }
381
        return domain_mappings.get(self.domain, {})
382
    
383
    def convert(self, han, style, errors, strict):
384
        """Convert with domain-specific rules."""
385
        # Check domain dictionary first
386
        if han in self.domain_dict:
387
            domain_pronunciations = self.domain_dict[han]
388
            # Format according to requested style...
389
            return domain_pronunciations
390
        
391
        # Fall back to default conversion
392
        return super().convert(han, style, errors, strict)
393

394
# Use custom converter
395
medical_converter = DomainSpecificConverter(domain='medical')
396
medical_pinyin = Pinyin(converter=medical_converter)
397

398
medical_text = '症状分析'
399
result = medical_pinyin.pinyin(medical_text)
400
print(f"Medical context: {result}")
401
```
402

403
### Combining Advanced Features
404

405
Integrating multiple advanced features for comprehensive processing:
406

407
```python
408
from pypinyin import lazy_pinyin, Style
409
from pypinyin.seg.mmseg import seg as mmseg_seg
410
from pypinyin.contrib.tone_convert import tone_to_tone3
411

412
def advanced_processing_pipeline(text):
413
    """Comprehensive processing with multiple advanced features."""
414
    
415
    # Step 1: Intelligent segmentation
416
    segments = mmseg_seg(text)
417
    print(f"Segments: {segments}")
418
    
419
    # Step 2: Pinyin conversion with tone sandhi
420
    pinyin_result = lazy_pinyin(
421
        text, 
422
        style=Style.TONE,
423
        tone_sandhi=True,
424
        v_to_u=True,
425
        neutral_tone_with_five=True
426
    )
427
    print(f"Pinyin with advanced features: {pinyin_result}")
428
    
429
    # Step 3: Format conversion
430
    tone_marked = ' '.join(pinyin_result)
431
    tone3_format = tone_to_tone3(tone_marked)
432
    print(f"Tone3 format: {tone3_format}")
433
    
434
    return {
435
        'segments': segments,
436
        'pinyin_advanced': pinyin_result,
437
        'tone3_format': tone3_format
438
    }
439

440
# Example usage
441
text = '一个不错的研究生'
442
results = advanced_processing_pipeline(text)
443

444
# Access different processing results
445
for key, value in results.items():
446
    print(f"{key}: {value}")
447
```
448

449
### Performance Optimization
450

451
Optimizing advanced feature usage for production scenarios:
452

453
```python
454
from functools import lru_cache
455
from pypinyin.core import Pinyin
456
from pypinyin.converter import DefaultConverter
457

458
class OptimizedConverter(DefaultConverter):
459
    """Performance-optimized converter with caching."""
460
    
461
    def __init__(self, cache_size=1000):
462
        super().__init__()
463
        self.cache_size = cache_size
464
        # Use LRU cache for frequent conversions
465
        self.convert = lru_cache(maxsize=cache_size)(self.convert)
466
    
467
    @lru_cache(maxsize=1000)
468
    def convert_cached(self, han, style, errors, strict):
469
        """Cached conversion for performance."""
470
        return super().convert(han, style, errors, strict)
471

472
# Batch processing with optimized converter
473
def batch_process_optimized(texts):
474
    """Process multiple texts with performance optimization."""
475
    optimized_converter = OptimizedConverter(cache_size=5000)
476
    pinyin_processor = Pinyin(converter=optimized_converter)
477
    
478
    results = []
479
    for text in texts:
480
        result = pinyin_processor.lazy_pinyin(text)
481
        results.append(result)
482
    
483
    return results
484

485
# Example with large dataset
486
large_dataset = ['中国', '美国', '英国'] * 1000  # Repeated texts
487
results = batch_process_optimized(large_dataset)
488
print(f"Processed {len(results)} texts efficiently")
489
```

Version

Tile

Files

advanced-features.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

advanced-features.mddocs/