or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

collation.mdencoding.mdformatting.mdindex.mdlanguage.mdlocalization.mdsearch-and-security.mdtext-transformation.mdunicode.md
tile.json

collation.mddocs/

Collation and Sorting

This document covers language-sensitive string comparison and sorting based on the Unicode Collation Algorithm (UCA).

Package Overview

  • collate: Language-sensitive string comparison and sorting
  • collate/build: Building custom collation tables

Collate Package

Import path: golang.org/x/text/collate

Provides types for comparing and sorting Unicode strings according to a given collation order, implementing the Unicode Collation Algorithm.

Collator Type

// Collator provides functionality for comparing strings for a given collation order
type Collator struct{}

// Constructors
func New(t language.Tag, o ...Option) *Collator
func NewFromTable(w colltab.Weighter, o ...Option) *Collator

Collator Methods

// Compare returns an integer comparing two byte slices
// Result: 0 if a==b, -1 if a < b, +1 if a > b
func (c *Collator) Compare(a, b []byte) int

// CompareString returns an integer comparing two strings
// Result: 0 if a==b, -1 if a < b, +1 if a > b
func (c *Collator) CompareString(a, b string) int

// Key returns the collation key for str
func (c *Collator) Key(buf *Buffer, str []byte) []byte

// KeyFromString returns the collation key for str
func (c *Collator) KeyFromString(buf *Buffer, str string) []byte

// Sort uses sort.Sort to sort the strings represented by x
func (c *Collator) Sort(x Lister)

// SortStrings uses sort.Sort to sort the strings in x
func (c *Collator) SortStrings(x []string)

Buffer Type

// Buffer holds keys generated by Key and KeyString
type Buffer struct{}

func (b *Buffer) Reset()

Lister Interface

// Lister can be sorted by Collator's Sort method
type Lister interface {
    Len() int
    Swap(i, j int)
    Bytes(i int) []byte
}

Option Type

// Option is used to change the behavior of a Collator
type Option func(*Collator)

Predefined Options

// IgnoreCase sets case-insensitive comparison
var IgnoreCase Option

// IgnoreDiacritics causes diacritical marks to be ignored ("o" == "ö")
var IgnoreDiacritics Option

// IgnoreWidth causes full-width characters to match their half-width equivalents
var IgnoreWidth Option

// Loose sets the collator to ignore diacritics, case and width
var Loose Option

// Force enables ordering if strings are equivalent but not equal
var Force Option

// Numeric specifies that numbers should sort numerically ("2" < "12")
var Numeric Option

Option Functions

// OptionsFromTag extracts BCP47 collation options from the tag
func OptionsFromTag(t language.Tag) Option

// Reorder overrides the pre-defined ordering of scripts and character sets
func Reorder(s ...string) Option

Functions

// Supported returns the list of languages for which collating differs from parent
func Supported() []language.Tag

Constants

const CLDRVersion string = "23"
const UnicodeVersion string = "6.2.0"

Usage Examples

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

// Create a collator for a specific language
col := collate.New(language.English)

// Compare strings
result := col.CompareString("apple", "banana") // -1 (apple < banana)
result = col.CompareString("apple", "apple")   // 0 (equal)
result = col.CompareString("banana", "apple")  // 1 (banana > apple)

// Sort strings
words := []string{"zebra", "apple", "mango", "banana"}
col.SortStrings(words)
// words is now: ["apple", "banana", "mango", "zebra"]

// Case-insensitive comparison
col = collate.New(language.English, collate.IgnoreCase)
result = col.CompareString("Apple", "apple") // 0 (equal)

// Ignore diacritics
col = collate.New(language.English, collate.IgnoreDiacritics)
result = col.CompareString("cafe", "café") // 0 (equal)

// Loose comparison (ignore case, diacritics, and width)
col = collate.New(language.English, collate.Loose)
result = col.CompareString("Café", "cafe") // 0 (equal)

// Numeric sorting
col = collate.New(language.English, collate.Numeric)
result = col.CompareString("file2.txt", "file10.txt") // -1 (2 < 10)

// Generate collation keys for efficient repeated comparisons
var buf collate.Buffer
key1 := col.KeyFromString(&buf, "apple")
buf.Reset()
key2 := col.KeyFromString(&buf, "banana")
// Compare keys with bytes.Compare
import "bytes"
result = bytes.Compare(key1, key2) // -1

// Language-specific sorting (German)
germanCol := collate.New(language.German)
germanWords := []string{"Öffnen", "Zebra", "Apfel"}
germanCol.SortStrings(germanWords)

// Extract options from language tag
tag := language.Make("de-u-co-phonebk") // German with phonebook ordering
col = collate.New(tag, collate.OptionsFromTag(tag))

// Multiple options
col = collate.New(
    language.English,
    collate.IgnoreCase,
    collate.Numeric,
)

// Custom script ordering
col = collate.New(
    language.Und,
    collate.Reorder("latn", "cyrl", "grek"),
)

Collate Build Package

Import path: golang.org/x/text/collate/build

Provides functionality for building custom collation tables.

Builder Type

// Builder builds a root collation table
type Builder struct{}

func NewBuilder() *Builder

Builder Methods

// Add adds an entry to the collation element table
// Collation element format: []int{primary, secondary, tertiary, ...}
func (b *Builder) Add(runes []rune, colelems [][]int, variables []int) error

// Build builds the root Collator
func (b *Builder) Build() (colltab.Weighter, error)

// Print prints the tables as a Go file
func (b *Builder) Print(w io.Writer) (n int, err error)

// Tailoring returns a Tailoring for the given locale
func (b *Builder) Tailoring(loc language.Tag) *Tailoring

Tailoring Type

// Tailoring builds a collation table based on another collation table
type Tailoring struct{}

Tailoring Methods

// Build builds a Collator for this Tailoring
func (t *Tailoring) Build() (colltab.Weighter, error)

// SetAnchor sets the point after which subsequent Insert calls will insert
func (t *Tailoring) SetAnchor(anchor string) error

// SetAnchorBefore sets the point before which subsequent Insert calls will insert
func (t *Tailoring) SetAnchorBefore(anchor string) error

// Insert sets the ordering of str relative to the anchor
func (t *Tailoring) Insert(level colltab.Level, str, extend string) error

Usage Examples

import (
    "golang.org/x/text/collate/build"
    "golang.org/x/text/language"
)

// Create a custom collation table
builder := build.NewBuilder()

// Add collation elements
// Primary weights determine base character ordering
// Secondary weights determine diacritics
// Tertiary weights determine case
err := builder.Add(
    []rune{'a'},
    [][]int{{100, 5, 5}}, // primary=100, secondary=5, tertiary=5
    nil,
)

err = builder.Add(
    []rune{'b'},
    [][]int{{200, 5, 5}},
    nil,
)

// Build the collation table
weighter, err := builder.Build()

// Create a collator from the custom table
import "golang.org/x/text/collate"
col := collate.NewFromTable(weighter)

// Create a tailoring for a specific language
tailoring := builder.Tailoring(language.Spanish)

// Set anchor point for insertions
err = tailoring.SetAnchor("n")

// Insert new ordering rules
// ñ should come after n
err = tailoring.Insert(colltab.Primary, "ñ", "")

// Build the tailored collator
spanishWeighter, err := tailoring.Build()
spanishCol := collate.NewFromTable(spanishWeighter)

Common Patterns

Sorting Slices of Strings

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

func sortStrings(strings []string, lang language.Tag) []string {
    col := collate.New(lang)
    col.SortStrings(strings)
    return strings
}

// Example usage
words := []string{"zebra", "apple", "mango"}
sortStrings(words, language.English)

Sorting Custom Types

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

type Person struct {
    Name string
    Age  int
}

type PersonList struct {
    persons []Person
    collator *collate.Collator
}

func (p *PersonList) Len() int {
    return len(p.persons)
}

func (p *PersonList) Swap(i, j int) {
    p.persons[i], p.persons[j] = p.persons[j], p.persons[i]
}

func (p *PersonList) Bytes(i int) []byte {
    return []byte(p.persons[i].Name)
}

func sortPersonsByName(persons []Person, lang language.Tag) []Person {
    col := collate.New(lang)
    list := &PersonList{
        persons:  persons,
        collator: col,
    }
    col.Sort(list)
    return persons
}

Case-Insensitive String Comparison

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

func equalIgnoreCase(a, b string) bool {
    col := collate.New(language.Und, collate.IgnoreCase)
    return col.CompareString(a, b) == 0
}

func lessIgnoreCase(a, b string) bool {
    col := collate.New(language.Und, collate.IgnoreCase)
    return col.CompareString(a, b) < 0
}

Efficient Repeated Comparisons with Keys

import (
    "bytes"
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

// SortableString holds a string and its collation key
type SortableString struct {
    Original string
    Key      []byte
}

func prepareSortableStrings(strings []string, col *collate.Collator) []SortableString {
    result := make([]SortableString, len(strings))
    var buf collate.Buffer

    for i, s := range strings {
        key := col.KeyFromString(&buf, s)
        // Make a copy of the key
        keyCopy := make([]byte, len(key))
        copy(keyCopy, key)

        result[i] = SortableString{
            Original: s,
            Key:      keyCopy,
        }
        buf.Reset()
    }

    return result
}

func sortByKeys(sortable []SortableString) {
    sort.Slice(sortable, func(i, j int) bool {
        return bytes.Compare(sortable[i].Key, sortable[j].Key) < 0
    })
}

// Complete example
func efficientSort(strings []string, lang language.Tag) []string {
    col := collate.New(lang)

    // Generate keys once
    sortable := prepareSortableStrings(strings, col)

    // Sort by keys (efficient)
    sortByKeys(sortable)

    // Extract sorted strings
    result := make([]string, len(sortable))
    for i, s := range sortable {
        result[i] = s.Original
    }

    return result
}

Natural/Numeric Sorting

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

func sortNatural(strings []string) []string {
    col := collate.New(language.Und, collate.Numeric)
    col.SortStrings(strings)
    return strings
}

// Example: sorts ["file1.txt", "file10.txt", "file2.txt"]
//      as: ["file1.txt", "file2.txt", "file10.txt"]

Language-Specific Search

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

// Check if slice contains string (using language-specific comparison)
func contains(slice []string, target string, lang language.Tag) bool {
    col := collate.New(lang, collate.IgnoreCase, collate.IgnoreDiacritics)

    for _, s := range slice {
        if col.CompareString(s, target) == 0 {
            return true
        }
    }

    return false
}

// Find index of string in slice
func indexOf(slice []string, target string, lang language.Tag) int {
    col := collate.New(lang, collate.IgnoreCase)

    for i, s := range slice {
        if col.CompareString(s, target) == 0 {
            return i
        }
    }

    return -1
}

Grouping by First Letter

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
    "unicode/utf8"
)

func groupByFirstLetter(strings []string, lang language.Tag) map[rune][]string {
    col := collate.New(lang)

    // Sort first
    col.SortStrings(strings)

    // Group by first letter
    groups := make(map[rune][]string)

    for _, s := range strings {
        if len(s) == 0 {
            continue
        }

        first, _ := utf8.DecodeRuneInString(s)
        groups[first] = append(groups[first], s)
    }

    return groups
}

Binary Search with Collation

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
    "sort"
)

// Binary search in a collation-sorted slice
func binarySearch(sorted []string, target string, lang language.Tag) int {
    col := collate.New(lang)

    i := sort.Search(len(sorted), func(i int) bool {
        return col.CompareString(sorted[i], target) >= 0
    })

    if i < len(sorted) && col.CompareString(sorted[i], target) == 0 {
        return i
    }

    return -1 // Not found
}

Implementing a Sorted Map

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

type SortedMap struct {
    keys     []string
    values   map[string]interface{}
    collator *collate.Collator
}

func NewSortedMap(lang language.Tag) *SortedMap {
    return &SortedMap{
        keys:     []string{},
        values:   make(map[string]interface{}),
        collator: collate.New(lang),
    }
}

func (m *SortedMap) Set(key string, value interface{}) {
    if _, exists := m.values[key]; !exists {
        m.keys = append(m.keys, key)
        m.collator.SortStrings(m.keys)
    }
    m.values[key] = value
}

func (m *SortedMap) Get(key string) (interface{}, bool) {
    val, ok := m.values[key]
    return val, ok
}

func (m *SortedMap) Keys() []string {
    return m.keys
}

Comparing with Multiple Options

import (
    "golang.org/x/text/collate"
    "golang.org/x/text/language"
)

// Flexible comparison function
type CompareOptions struct {
    IgnoreCase       bool
    IgnoreDiacritics bool
    IgnoreWidth      bool
    Numeric          bool
    Language         language.Tag
}

func compare(a, b string, opts CompareOptions) int {
    var options []collate.Option

    if opts.IgnoreCase {
        options = append(options, collate.IgnoreCase)
    }
    if opts.IgnoreDiacritics {
        options = append(options, collate.IgnoreDiacritics)
    }
    if opts.IgnoreWidth {
        options = append(options, collate.IgnoreWidth)
    }
    if opts.Numeric {
        options = append(options, collate.Numeric)
    }

    col := collate.New(opts.Language, options...)
    return col.CompareString(a, b)
}

// Usage
result := compare("Café", "cafe", CompareOptions{
    IgnoreCase:       true,
    IgnoreDiacritics: true,
    Language:         language.French,
}) // 0 (equal)

Version Information

Based on:

  • CLDR 23
  • Unicode 6.2.0
  • Unicode Collation Algorithm (UCA)
  • Unicode Technical Standard #10