0
# Utility Functions
1
2
Helper functions for comparing Chinese strings by Pinyin pronunciation and manipulating Pinyin result arrays for advanced use cases.
3
4
## Capabilities
5
6
### String Comparison
7
8
Compare Chinese strings based on their Pinyin pronunciation for sorting and ordering operations.
9
10
```typescript { .api }
11
/**
12
* Compare two Chinese strings by their Pinyin pronunciation
13
* @param hanA - First Chinese string to compare
14
* @param hanB - Second Chinese string to compare
15
* @returns -1 if hanA < hanB, 0 if equal, 1 if hanA > hanB (lexicographic order)
16
*/
17
function compare(hanA: string, hanB: string): number;
18
```
19
20
**Usage Examples:**
21
22
```typescript
23
import pinyin, { compare } from "pinyin";
24
25
// Basic comparison
26
console.log(compare("北京", "上海"));
27
// Result: -1 (Beijing comes before Shanghai alphabetically in Pinyin)
28
29
console.log(compare("你好", "你好"));
30
// Result: 0 (identical strings)
31
32
console.log(compare("中国", "美国"));
33
// Result: 1 (China comes after America alphabetically in Pinyin)
34
35
// Sorting Chinese strings by Pinyin
36
const cities = ["北京", "上海", "广州", "深圳", "杭州"];
37
cities.sort(compare);
38
console.log(cities);
39
// Result: ["北京", "广州", "杭州", "上海", "深圳"] (sorted by Pinyin)
40
41
// Using with Array.sort for complex data
42
const users = [
43
{ name: "王小明", age: 25 },
44
{ name: "李小红", age: 30 },
45
{ name: "陈大华", age: 28 }
46
];
47
48
users.sort((a, b) => compare(a.name, b.name));
49
console.log(users.map(u => u.name));
50
// Result: Names sorted by Pinyin pronunciation
51
```
52
53
### Array Compacting
54
55
Transform multi-dimensional Pinyin arrays into all possible pronunciation combinations.
56
57
```typescript { .api }
58
/**
59
* Compact multi-dimensional Pinyin arrays into all possible combinations
60
* @param arr - Multi-dimensional array of Pinyin pronunciations
61
* @returns Array of arrays containing all possible pronunciation combinations
62
*/
63
function compact(arr: string[][]): string[][];
64
```
65
66
**Usage Examples:**
67
68
```typescript
69
import { compact } from "pinyin";
70
71
// Basic compacting
72
const pinyinResult = [["nǐ"], ["hǎo", "hào"], ["ma", "má", "mǎ"]];
73
console.log(compact(pinyinResult));
74
// Result: [
75
// ["nǐ", "hǎo", "ma"], ["nǐ", "hǎo", "má"], ["nǐ", "hǎo", "mǎ"],
76
// ["nǐ", "hào", "ma"], ["nǐ", "hào", "má"], ["nǐ", "hào", "mǎ"]
77
// ]
78
79
// Single pronunciation per character
80
const simple = [["zhōng"], ["xīn"]];
81
console.log(compact(simple));
82
// Result: [["zhōng", "xīn"]]
83
84
// Empty or single arrays
85
console.log(compact([]));
86
// Result: []
87
88
console.log(compact([["hello"]]));
89
// Result: [["hello"]]
90
```
91
92
### Integrated Compact Option
93
94
The compact functionality is also available directly through the main pinyin function:
95
96
```typescript
97
import pinyin from "pinyin";
98
99
// Using compact option in main function
100
console.log(pinyin("你好吗", {
101
heteronym: true,
102
compact: true
103
}));
104
// Same result as: compact(pinyin("你好吗", { heteronym: true }))
105
```
106
107
## Advanced Usage Patterns
108
109
### Chinese Text Sorting
110
111
Create robust sorting functions for Chinese text data:
112
113
```typescript
114
import { compare } from "pinyin";
115
116
// Sort function for Chinese names
117
function sortChineseNames(names: string[]): string[] {
118
return names.slice().sort(compare);
119
}
120
121
// Sort objects by Chinese property
122
function sortByChineseName<T extends { name: string }>(items: T[]): T[] {
123
return items.slice().sort((a, b) => compare(a.name, b.name));
124
}
125
126
// Usage
127
const names = ["张三", "李四", "王五", "赵六"];
128
console.log(sortChineseNames(names));
129
130
const products = [
131
{ name: "苹果", price: 5 },
132
{ name: "香蕉", price: 3 },
133
{ name: "橙子", price: 4 }
134
];
135
console.log(sortByChineseName(products));
136
```
137
138
### Pronunciation Combination Analysis
139
140
Analyze all possible pronunciations for ambiguous text:
141
142
```typescript
143
import pinyin, { compact } from "pinyin";
144
145
function getAllPronunciations(text: string): string[][] {
146
const pinyinResult = pinyin(text, { heteronym: true });
147
return compact(pinyinResult);
148
}
149
150
// Find all ways to pronounce ambiguous text
151
const combinations = getAllPronunciations("中行");
152
console.log(combinations);
153
// Result: [["zhōng", "háng"], ["zhōng", "xíng"], ["zhòng", "háng"], ["zhòng", "xíng"]]
154
155
// Count pronunciation variations
156
function countPronunciations(text: string): number {
157
return getAllPronunciations(text).length;
158
}
159
160
console.log(countPronunciations("中行")); // 4 combinations
161
console.log(countPronunciations("你好")); // 1 combination (no ambiguity)
162
```
163
164
### Search and Indexing
165
166
Use utility functions for search functionality:
167
168
```typescript
169
import pinyin, { compare } from "pinyin";
170
171
// Create search index with Pinyin
172
function createSearchIndex(texts: string[]) {
173
return texts.map(text => ({
174
original: text,
175
pinyin: pinyin(text, { style: "normal" }).flat().join(""),
176
pinyinTones: pinyin(text).flat().join("")
177
}));
178
}
179
180
// Binary search in sorted Chinese text
181
function binarySearchChinese(sortedArray: string[], target: string): number {
182
let left = 0;
183
let right = sortedArray.length - 1;
184
185
while (left <= right) {
186
const mid = Math.floor((left + right) / 2);
187
const comparison = compare(sortedArray[mid], target);
188
189
if (comparison === 0) return mid;
190
if (comparison < 0) left = mid + 1;
191
else right = mid - 1;
192
}
193
194
return -1;
195
}
196
197
// Usage
198
const sortedCities = ["北京", "广州", "杭州", "上海", "深圳"];
199
console.log(binarySearchChinese(sortedCities, "杭州")); // Returns index
200
```
201
202
## Performance Considerations
203
204
### Comparison Function Performance
205
206
The `compare` function internally converts both strings to Pinyin for comparison:
207
208
```typescript
209
// Efficient for one-time comparisons
210
compare("北京", "上海");
211
212
// For sorting large arrays, consider pre-computing Pinyin
213
const items = ["北京", "上海", "广州", "深圳"];
214
215
// Less efficient - computes Pinyin repeatedly
216
items.sort(compare);
217
218
// More efficient for large datasets - pre-compute Pinyin
219
const itemsWithPinyin = items.map(item => ({
220
original: item,
221
pinyin: pinyin(item).flat().join("")
222
}));
223
224
itemsWithPinyin.sort((a, b) => a.pinyin.localeCompare(b.pinyin));
225
const sorted = itemsWithPinyin.map(item => item.original);
226
```
227
228
### Compact Function Complexity
229
230
The compact function generates all combinations, which can grow exponentially:
231
232
```typescript
233
// Small number of combinations
234
compact([["a"], ["b", "c"]]); // 2 combinations
235
236
// Large number of combinations - use carefully
237
compact([["a", "b"], ["c", "d"], ["e", "f"], ["g", "h"]]); // 16 combinations
238
239
// Very large - consider memory impact
240
const manyOptions = Array(5).fill(["a", "b", "c", "d"]);
241
compact(manyOptions); // 1024 combinations!
242
```
243
244
For performance-critical applications with many polyphonic characters, consider limiting the scope or using heteronym: false to avoid exponential growth.