or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

advanced-features.mdcommand-line-tools.mdcore-functions.mddictionary-customization.mdindex.mdstyles-formatting.md

dictionary-customization.mddocs/

0

# Dictionary Customization

1

2

Functions for loading custom pronunciation dictionaries to override default pinyin mappings for specific characters or phrases, enabling accurate pronunciation for specialized domains, proper nouns, and regional variations.

3

4

## Capabilities

5

6

### Single Character Dictionary Loading

7

8

Load custom pronunciations for individual Chinese characters to override default mappings.

9

10

```python { .api }

11

def load_single_dict(pinyin_dict, style='default'):

12

"""

13

Load custom dictionary for single character pinyin pronunciation corrections.

14

15

Parameters:

16

- pinyin_dict (dict): Dictionary mapping characters to pinyin pronunciations

17

Format: {character: [pinyin_list]} or {character: pinyin_string}

18

- style (str): Dictionary style identifier (default: 'default')

19

20

Returns:

21

None: Dictionary is loaded globally and affects all subsequent conversions

22

"""

23

```

24

25

#### Usage Examples

26

27

```python

28

from pypinyin import load_single_dict, lazy_pinyin

29

30

# Load custom pronunciations for specific characters

31

custom_dict = {

32

'朴': ['pǔ'], # Override default pronunciation

33

'任': ['rén'], # Ensure specific pronunciation for surnames

34

'华': ['huá'] # Set preferred pronunciation

35

}

36

37

# Load the custom dictionary

38

load_single_dict(custom_dict)

39

40

# Test the custom pronunciations

41

result = lazy_pinyin('朴素')

42

print(result) # Uses custom pronunciation for 朴

43

44

# Multiple pronunciation format

45

custom_dict_multi = {

46

'银': ['yín', 'yǐn'], # Multiple possible pronunciations

47

'行': ['xíng', 'háng']

48

}

49

50

load_single_dict(custom_dict_multi)

51

```

52

53

### Phrase Dictionary Loading

54

55

Load custom pronunciations for multi-character phrases to ensure accurate pronunciation through proper word segmentation.

56

57

```python { .api }

58

def load_phrases_dict(phrases_dict, style='default'):

59

"""

60

Load custom dictionary for phrase pinyin pronunciation corrections.

61

62

Parameters:

63

- phrases_dict (dict): Dictionary mapping phrases to pinyin pronunciations

64

Format: {phrase: [[pinyin_for_char1], [pinyin_for_char2], ...]}

65

- style (str): Dictionary style identifier (default: 'default')

66

67

Returns:

68

None: Dictionary is loaded globally and affects all subsequent conversions

69

"""

70

```

71

72

#### Usage Examples

73

74

```python

75

from pypinyin import load_phrases_dict, lazy_pinyin

76

77

# Load custom phrase pronunciations

78

phrase_dict = {

79

'重庆': [['chóng'], ['qìng']], # Proper pronunciation for city name

80

'长安': [['cháng'], ['ān']], # Historical place name

81

'银行': [['yín'], ['háng']], # Financial institution context

82

'一个': [['yí'], ['gè']], # Tone sandhi correction

83

}

84

85

# Load the phrase dictionary

86

load_phrases_dict(phrase_dict)

87

88

# Test phrase pronunciations

89

result = lazy_pinyin('重庆市')

90

print(result) # Uses custom phrase pronunciation for 重庆

91

92

result = lazy_pinyin('我在银行工作')

93

print(result) # Uses banking context pronunciation for 银行

94

```

95

96

### Dictionary Style Management

97

98

Both dictionary loading functions support style parameters for managing multiple dictionary sets.

99

100

```python

101

from pypinyin import load_single_dict, load_phrases_dict

102

103

# Load dictionaries with different styles for different contexts

104

medical_chars = {

105

'症': ['zhèng'], # Medical context

106

'脉': ['mài'] # Traditional medicine

107

}

108

load_single_dict(medical_chars, style='medical')

109

110

# Legal terminology phrases

111

legal_phrases = {

112

'法院': [['fǎ'], ['yuàn']],

113

'起诉': [['qǐ'], ['sù']]

114

}

115

load_phrases_dict(legal_phrases, style='legal')

116

```

117

118

## Custom Dictionary Formats

119

120

### Single Character Dictionary Format

121

122

Character dictionaries map individual characters to their pronunciations:

123

124

```python

125

# Simple string format (single pronunciation)

126

single_dict = {

127

'张': 'zhāng',

128

'李': 'lǐ',

129

'王': 'wáng'

130

}

131

132

# List format (multiple pronunciations)

133

multi_dict = {

134

'中': ['zhōng', 'zhòng'], # Different pronunciations in different contexts

135

'的': ['de', 'dí', 'dì'], # Multiple grammatical uses

136

'行': ['xíng', 'háng'] # Different meanings

137

}

138

139

# Mixed format

140

mixed_dict = {

141

'朴': 'pǔ', # Single pronunciation

142

'任': ['rén', 'rèn'], # Multiple pronunciations

143

'华': 'huá' # Single pronunciation

144

}

145

```

146

147

### Phrase Dictionary Format

148

149

Phrase dictionaries map multi-character strings to pinyin arrays:

150

151

```python

152

# Standard phrase format

153

phrase_dict = {

154

# Each phrase maps to list of lists (one list per character)

155

'北京': [['běi'], ['jīng']],

156

'上海': [['shàng'], ['hǎi']],

157

158

# Phrases with tone sandhi corrections

159

'一个': [['yí'], ['gè']], # 一 changes tone before 4th tone

160

'不用': [['bú'], ['yòng']], # 不 changes tone before 4th tone

161

162

# Proper nouns with specific pronunciations

163

'西安': [['xī'], ['ān']], # City name

164

'长城': [['cháng'], ['chéng']], # Great Wall

165

166

# Technical terms

167

'数据': [['shù'], ['jù']], # Data/statistics context

168

'银行': [['yín'], ['háng']], # Banking context

169

}

170

```

171

172

## Dictionary Loading Strategies

173

174

### Domain-Specific Dictionaries

175

176

Load different dictionaries for different application domains:

177

178

```python

179

from pypinyin import load_single_dict, load_phrases_dict, lazy_pinyin

180

181

# Medical terminology

182

def load_medical_dict():

183

medical_chars = {

184

'症': ['zhèng'], # Symptom

185

'脉': ['mài'], # Pulse

186

'药': ['yào'] # Medicine

187

}

188

189

medical_phrases = {

190

'感冒': [['gǎn'], ['mào']],

191

'发烧': [['fā'], ['shāo']],

192

'头痛': [['tóu'], ['tòng']]

193

}

194

195

load_single_dict(medical_chars, style='medical')

196

load_phrases_dict(medical_phrases, style='medical')

197

198

# Geographic names

199

def load_geographic_dict():

200

geo_phrases = {

201

'黑龙江': [['hēi'], ['lóng'], ['jiāng']],

202

'内蒙古': [['nèi'], ['měng'], ['gǔ']],

203

'新疆': [['xīn'], ['jiāng']]

204

}

205

206

load_phrases_dict(geo_phrases, style='geographic')

207

208

# Load domain-specific dictionaries

209

load_medical_dict()

210

load_geographic_dict()

211

212

# Test with domain-specific text

213

medical_text = "患者出现发烧症状"

214

result = lazy_pinyin(medical_text)

215

print(result) # Uses medical pronunciation rules

216

```

217

218

### Personal Name Handling

219

220

Special handling for Chinese personal names and surnames:

221

222

```python

223

from pypinyin import load_single_dict, load_phrases_dict

224

225

# Common surname pronunciations

226

surname_dict = {

227

'朴': ['piáo'], # Korean-origin surname

228

'任': ['rén'], # Surname context

229

'华': ['huá'], # Given name context

230

'单': ['shàn'], # Surname (not dān)

231

'种': ['chóng'] # Surname (not zhǒng)

232

}

233

234

# Famous person names

235

famous_names = {

236

'孔子': [['kǒng'], ['zǐ']],

237

'老子': [['lǎo'], ['zǐ']],

238

'庄子': [['zhuāng'], ['zǐ']]

239

}

240

241

load_single_dict(surname_dict, style='names')

242

load_phrases_dict(famous_names, style='names')

243

244

# Test name pronunciation

245

names = ['朴志明', '任小华', '孔子']

246

for name in names:

247

result = lazy_pinyin(name)

248

print(f"{name}: {' '.join(result)}")

249

```

250

251

## Dictionary Integration Patterns

252

253

### Layered Dictionary Loading

254

255

Build comprehensive pronunciation systems by layering multiple dictionaries:

256

257

```python

258

def setup_comprehensive_dict():

259

# Base corrections for common issues

260

base_corrections = {

261

'一': ['yī', 'yí', 'yì'], # Context-dependent tone changes

262

'不': ['bù', 'bú'] # Tone sandhi variations

263

}

264

load_single_dict(base_corrections, style='base')

265

266

# Regional pronunciation preferences

267

regional_prefs = {

268

'什么': [['shén'], ['me']], # Northern pronunciation

269

'这样': [['zhè'], ['yàng']] # Standard pronunciation

270

}

271

load_phrases_dict(regional_prefs, style='regional')

272

273

# Specialized terminology

274

tech_terms = {

275

'数据库': [['shù'], ['jù'], ['kù']],

276

'算法': [['suàn'], ['fǎ']]

277

}

278

load_phrases_dict(tech_terms, style='technical')

279

280

# Initialize comprehensive dictionary system

281

setup_comprehensive_dict()

282

```

283

284

### Dynamic Dictionary Updates

285

286

Update dictionaries based on context or user preferences:

287

288

```python

289

from pypinyin import load_single_dict, load_phrases_dict

290

291

def update_context_dict(context='general'):

292

"""Update pronunciation dictionary based on context."""

293

294

if context == 'business':

295

business_dict = {

296

'银行': [['yín'], ['háng']], # Banking context

297

'股票': [['gǔ'], ['piào']], # Stock market

298

'公司': [['gōng'], ['sī']] # Company

299

}

300

load_phrases_dict(business_dict, style='business')

301

302

elif context == 'education':

303

edu_dict = {

304

'学校': [['xué'], ['xiào']],

305

'老师': [['lǎo'], ['shī']],

306

'学生': [['xué'], ['shēng']]

307

}

308

load_phrases_dict(edu_dict, style='education')

309

310

elif context == 'travel':

311

travel_dict = {

312

'飞机': [['fēi'], ['jī']],

313

'火车': [['huǒ'], ['chē']],

314

'酒店': [['jiǔ'], ['diàn']]

315

}

316

load_phrases_dict(travel_dict, style='travel')

317

318

# Use context-specific dictionaries

319

update_context_dict('business')

320

business_text = "去银行办理股票账户"

321

result = lazy_pinyin(business_text)

322

print(' '.join(result))

323

```