This document covers language-sensitive string comparison and sorting based on the Unicode Collation Algorithm (UCA).
Import path: golang.org/x/text/collate
Provides types for comparing and sorting Unicode strings according to a given collation order, implementing the Unicode Collation Algorithm.
// Collator provides functionality for comparing strings for a given collation order
type Collator struct{}
// Constructors
func New(t language.Tag, o ...Option) *Collator
func NewFromTable(w colltab.Weighter, o ...Option) *Collator// Compare returns an integer comparing two byte slices
// Result: 0 if a==b, -1 if a < b, +1 if a > b
func (c *Collator) Compare(a, b []byte) int
// CompareString returns an integer comparing two strings
// Result: 0 if a==b, -1 if a < b, +1 if a > b
func (c *Collator) CompareString(a, b string) int
// Key returns the collation key for str
func (c *Collator) Key(buf *Buffer, str []byte) []byte
// KeyFromString returns the collation key for str
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte
// Sort uses sort.Sort to sort the strings represented by x
func (c *Collator) Sort(x Lister)
// SortStrings uses sort.Sort to sort the strings in x
func (c *Collator) SortStrings(x []string)// Buffer holds keys generated by Key and KeyString
type Buffer struct{}
func (b *Buffer) Reset()// Lister can be sorted by Collator's Sort method
type Lister interface {
Len() int
Swap(i, j int)
Bytes(i int) []byte
}// Option is used to change the behavior of a Collator
type Option func(*Collator)// IgnoreCase sets case-insensitive comparison
var IgnoreCase Option
// IgnoreDiacritics causes diacritical marks to be ignored ("o" == "ö")
var IgnoreDiacritics Option
// IgnoreWidth causes full-width characters to match their half-width equivalents
var IgnoreWidth Option
// Loose sets the collator to ignore diacritics, case and width
var Loose Option
// Force enables ordering if strings are equivalent but not equal
var Force Option
// Numeric specifies that numbers should sort numerically ("2" < "12")
var Numeric Option// OptionsFromTag extracts BCP47 collation options from the tag
func OptionsFromTag(t language.Tag) Option
// Reorder overrides the pre-defined ordering of scripts and character sets
func Reorder(s ...string) Option// Supported returns the list of languages for which collating differs from parent
func Supported() []language.Tagconst CLDRVersion string = "23"
const UnicodeVersion string = "6.2.0"import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// Create a collator for a specific language
col := collate.New(language.English)
// Compare strings
result := col.CompareString("apple", "banana") // -1 (apple < banana)
result = col.CompareString("apple", "apple") // 0 (equal)
result = col.CompareString("banana", "apple") // 1 (banana > apple)
// Sort strings
words := []string{"zebra", "apple", "mango", "banana"}
col.SortStrings(words)
// words is now: ["apple", "banana", "mango", "zebra"]
// Case-insensitive comparison
col = collate.New(language.English, collate.IgnoreCase)
result = col.CompareString("Apple", "apple") // 0 (equal)
// Ignore diacritics
col = collate.New(language.English, collate.IgnoreDiacritics)
result = col.CompareString("cafe", "café") // 0 (equal)
// Loose comparison (ignore case, diacritics, and width)
col = collate.New(language.English, collate.Loose)
result = col.CompareString("Café", "cafe") // 0 (equal)
// Numeric sorting
col = collate.New(language.English, collate.Numeric)
result = col.CompareString("file2.txt", "file10.txt") // -1 (2 < 10)
// Generate collation keys for efficient repeated comparisons
var buf collate.Buffer
key1 := col.KeyFromString(&buf, "apple")
buf.Reset()
key2 := col.KeyFromString(&buf, "banana")
// Compare keys with bytes.Compare
import "bytes"
result = bytes.Compare(key1, key2) // -1
// Language-specific sorting (German)
germanCol := collate.New(language.German)
germanWords := []string{"Öffnen", "Zebra", "Apfel"}
germanCol.SortStrings(germanWords)
// Extract options from language tag
tag := language.Make("de-u-co-phonebk") // German with phonebook ordering
col = collate.New(tag, collate.OptionsFromTag(tag))
// Multiple options
col = collate.New(
language.English,
collate.IgnoreCase,
collate.Numeric,
)
// Custom script ordering
col = collate.New(
language.Und,
collate.Reorder("latn", "cyrl", "grek"),
)Import path: golang.org/x/text/collate/build
Provides functionality for building custom collation tables.
// Builder builds a root collation table
type Builder struct{}
func NewBuilder() *Builder// Add adds an entry to the collation element table
// Collation element format: []int{primary, secondary, tertiary, ...}
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error
// Build builds the root Collator
func (b *Builder) Build() (colltab.Weighter, error)
// Print prints the tables as a Go file
func (b *Builder) Print(w io.Writer) (n int, err error)
// Tailoring returns a Tailoring for the given locale
func (b *Builder) Tailoring(loc language.Tag) *Tailoring// Tailoring builds a collation table based on another collation table
type Tailoring struct{}// Build builds a Collator for this Tailoring
func (t *Tailoring) Build() (colltab.Weighter, error)
// SetAnchor sets the point after which subsequent Insert calls will insert
func (t *Tailoring) SetAnchor(anchor string) error
// SetAnchorBefore sets the point before which subsequent Insert calls will insert
func (t *Tailoring) SetAnchorBefore(anchor string) error
// Insert sets the ordering of str relative to the anchor
func (t *Tailoring) Insert(level colltab.Level, str, extend string) errorimport (
"golang.org/x/text/collate/build"
"golang.org/x/text/language"
)
// Create a custom collation table
builder := build.NewBuilder()
// Add collation elements
// Primary weights determine base character ordering
// Secondary weights determine diacritics
// Tertiary weights determine case
err := builder.Add(
[]rune{'a'},
[][]int{{100, 5, 5}}, // primary=100, secondary=5, tertiary=5
nil,
)
err = builder.Add(
[]rune{'b'},
[][]int{{200, 5, 5}},
nil,
)
// Build the collation table
weighter, err := builder.Build()
// Create a collator from the custom table
import "golang.org/x/text/collate"
col := collate.NewFromTable(weighter)
// Create a tailoring for a specific language
tailoring := builder.Tailoring(language.Spanish)
// Set anchor point for insertions
err = tailoring.SetAnchor("n")
// Insert new ordering rules
// ñ should come after n
err = tailoring.Insert(colltab.Primary, "ñ", "")
// Build the tailored collator
spanishWeighter, err := tailoring.Build()
spanishCol := collate.NewFromTable(spanishWeighter)import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func sortStrings(strings []string, lang language.Tag) []string {
col := collate.New(lang)
col.SortStrings(strings)
return strings
}
// Example usage
words := []string{"zebra", "apple", "mango"}
sortStrings(words, language.English)import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
type Person struct {
Name string
Age int
}
type PersonList struct {
persons []Person
collator *collate.Collator
}
func (p *PersonList) Len() int {
return len(p.persons)
}
func (p *PersonList) Swap(i, j int) {
p.persons[i], p.persons[j] = p.persons[j], p.persons[i]
}
func (p *PersonList) Bytes(i int) []byte {
return []byte(p.persons[i].Name)
}
func sortPersonsByName(persons []Person, lang language.Tag) []Person {
col := collate.New(lang)
list := &PersonList{
persons: persons,
collator: col,
}
col.Sort(list)
return persons
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func equalIgnoreCase(a, b string) bool {
col := collate.New(language.Und, collate.IgnoreCase)
return col.CompareString(a, b) == 0
}
func lessIgnoreCase(a, b string) bool {
col := collate.New(language.Und, collate.IgnoreCase)
return col.CompareString(a, b) < 0
}import (
"bytes"
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// SortableString holds a string and its collation key
type SortableString struct {
Original string
Key []byte
}
func prepareSortableStrings(strings []string, col *collate.Collator) []SortableString {
result := make([]SortableString, len(strings))
var buf collate.Buffer
for i, s := range strings {
key := col.KeyFromString(&buf, s)
// Make a copy of the key
keyCopy := make([]byte, len(key))
copy(keyCopy, key)
result[i] = SortableString{
Original: s,
Key: keyCopy,
}
buf.Reset()
}
return result
}
func sortByKeys(sortable []SortableString) {
sort.Slice(sortable, func(i, j int) bool {
return bytes.Compare(sortable[i].Key, sortable[j].Key) < 0
})
}
// Complete example
func efficientSort(strings []string, lang language.Tag) []string {
col := collate.New(lang)
// Generate keys once
sortable := prepareSortableStrings(strings, col)
// Sort by keys (efficient)
sortByKeys(sortable)
// Extract sorted strings
result := make([]string, len(sortable))
for i, s := range sortable {
result[i] = s.Original
}
return result
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
func sortNatural(strings []string) []string {
col := collate.New(language.Und, collate.Numeric)
col.SortStrings(strings)
return strings
}
// Example: sorts ["file1.txt", "file10.txt", "file2.txt"]
// as: ["file1.txt", "file2.txt", "file10.txt"]import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// Check if slice contains string (using language-specific comparison)
func contains(slice []string, target string, lang language.Tag) bool {
col := collate.New(lang, collate.IgnoreCase, collate.IgnoreDiacritics)
for _, s := range slice {
if col.CompareString(s, target) == 0 {
return true
}
}
return false
}
// Find index of string in slice
func indexOf(slice []string, target string, lang language.Tag) int {
col := collate.New(lang, collate.IgnoreCase)
for i, s := range slice {
if col.CompareString(s, target) == 0 {
return i
}
}
return -1
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
"unicode/utf8"
)
func groupByFirstLetter(strings []string, lang language.Tag) map[rune][]string {
col := collate.New(lang)
// Sort first
col.SortStrings(strings)
// Group by first letter
groups := make(map[rune][]string)
for _, s := range strings {
if len(s) == 0 {
continue
}
first, _ := utf8.DecodeRuneInString(s)
groups[first] = append(groups[first], s)
}
return groups
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
"sort"
)
// Binary search in a collation-sorted slice
func binarySearch(sorted []string, target string, lang language.Tag) int {
col := collate.New(lang)
i := sort.Search(len(sorted), func(i int) bool {
return col.CompareString(sorted[i], target) >= 0
})
if i < len(sorted) && col.CompareString(sorted[i], target) == 0 {
return i
}
return -1 // Not found
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
type SortedMap struct {
keys []string
values map[string]interface{}
collator *collate.Collator
}
func NewSortedMap(lang language.Tag) *SortedMap {
return &SortedMap{
keys: []string{},
values: make(map[string]interface{}),
collator: collate.New(lang),
}
}
func (m *SortedMap) Set(key string, value interface{}) {
if _, exists := m.values[key]; !exists {
m.keys = append(m.keys, key)
m.collator.SortStrings(m.keys)
}
m.values[key] = value
}
func (m *SortedMap) Get(key string) (interface{}, bool) {
val, ok := m.values[key]
return val, ok
}
func (m *SortedMap) Keys() []string {
return m.keys
}import (
"golang.org/x/text/collate"
"golang.org/x/text/language"
)
// Flexible comparison function
type CompareOptions struct {
IgnoreCase bool
IgnoreDiacritics bool
IgnoreWidth bool
Numeric bool
Language language.Tag
}
func compare(a, b string, opts CompareOptions) int {
var options []collate.Option
if opts.IgnoreCase {
options = append(options, collate.IgnoreCase)
}
if opts.IgnoreDiacritics {
options = append(options, collate.IgnoreDiacritics)
}
if opts.IgnoreWidth {
options = append(options, collate.IgnoreWidth)
}
if opts.Numeric {
options = append(options, collate.Numeric)
}
col := collate.New(opts.Language, options...)
return col.CompareString(a, b)
}
// Usage
result := compare("Café", "cafe", CompareOptions{
IgnoreCase: true,
IgnoreDiacritics: true,
Language: language.French,
}) // 0 (equal)Based on: