or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

collation.mdencoding.mdformatting.mdindex.mdlanguage.mdlocalization.mdsearch-and-security.mdtext-transformation.mdunicode.md
tile.json

unicode.mddocs/

Unicode Operations

This document covers Unicode-related packages including bidirectional text support, normalization, CLDR data access, range tables, and rune name lookups.

Package Overview

  • unicode/bidi: Bidirectional text support
  • unicode/norm: Unicode normalization (NFC, NFD, NFKC, NFKD)
  • unicode/cldr: CLDR (Common Locale Data Repository) XML parser
  • unicode/rangetable: Utilities for creating and inspecting unicode.RangeTables
  • unicode/runenames: Unicode character names

Unicode Normalization Package

Import path: golang.org/x/text/unicode/norm

Provides Unicode normalization forms NFC, NFD, NFKC, and NFKD.

Form Type

// Form denotes a canonical representation of Unicode code points
type Form int

const (
    NFC  Form = iota // Unicode Normalization Form C (Canonical Composition)
    NFD              // Unicode Normalization Form D (Canonical Decomposition)
    NFKC             // Unicode Normalization Form KC (Compatibility Composition)
    NFKD             // Unicode Normalization Form KD (Compatibility Decomposition)
)

Form Methods

// Transformation methods
func (f Form) Bytes(b []byte) []byte
func (f Form) String(s string) string
func (f Form) Append(out []byte, src ...byte) []byte
func (f Form) AppendString(out []byte, src string) []byte

// Testing methods
func (f Form) IsNormal(b []byte) bool
func (f Form) IsNormalString(s string) bool

// Boundary detection
func (f Form) FirstBoundary(b []byte) int
func (f Form) FirstBoundaryInString(s string) int
func (f Form) LastBoundary(b []byte) int
func (f Form) NextBoundary(b []byte, atEOF bool) int
func (f Form) NextBoundaryInString(s string, atEOF bool) int

// Quick check (partial normalization test)
func (f Form) QuickSpan(b []byte) int
func (f Form) QuickSpanString(s string) int

// Properties
func (f Form) Properties(s []byte) Properties
func (f Form) PropertiesString(s string) Properties

// I/O wrappers
func (f Form) Reader(r io.Reader) io.Reader
func (f Form) Writer(w io.Writer) io.WriteCloser

// Transform interface implementation
func (f Form) Reset()
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (f Form) Span(b []byte, atEOF bool) (n int, err error)
func (f Form) SpanString(s string, atEOF bool) (n int, err error)

Properties Type

// Properties provides access to normalization properties of a rune
type Properties struct{}

func (p Properties) BoundaryAfter() bool
func (p Properties) BoundaryBefore() bool
func (p Properties) CCC() uint8
func (p Properties) LeadCCC() uint8
func (p Properties) TrailCCC() uint8
func (p Properties) Decomposition() []byte
func (p Properties) Size() int

Iter Type

// Iter iterates over a string or byte slice, normalizing it
type Iter struct{}

func (i *Iter) Init(f Form, src []byte)
func (i *Iter) InitString(f Form, src string)
func (i *Iter) Next() []byte
func (i *Iter) Pos() int
func (i *Iter) Done() bool
func (i *Iter) Seek(offset int64, whence int) (int64, error)

Constants

const Version string = "15.0.0"
const MaxTransformChunkSize int = 35 + maxNonStarters*4
const GraphemeJoiner string = "\u034F"
const MaxSegmentSize int

Usage Examples

import "golang.org/x/text/unicode/norm"

// Normalize strings
nfc := norm.NFC.String("Café") // Composed form
nfd := norm.NFD.String("Café") // Decomposed form

// Test if normalized
isNormal := norm.NFC.IsNormalString("Café")

// Normalize bytes
normalized := norm.NFC.Bytes([]byte("Café"))

// Append normalized form
result := norm.NFC.AppendString(buf, "Café")

// Find normalization boundaries
boundary := norm.NFC.FirstBoundaryInString(text)

// Check if prefix is normalized
n := norm.NFC.QuickSpanString(text) // text[:n] == NFC(text[:n])

// Iterate over normalized segments
var iter norm.Iter
iter.InitString(norm.NFC, "Café")
for !iter.Done() {
    segment := iter.Next()
    // Process segment
}

// Streaming normalization
normalizedReader := norm.NFC.Reader(inputReader)
normalizedWriter := norm.NFC.Writer(outputWriter)

// Get properties
props := norm.NFC.PropertiesString("é")
ccc := props.CCC()           // Canonical combining class
decomp := props.Decomposition() // Decomposition mapping

Bidirectional Text Package

Import path: golang.org/x/text/unicode/bidi

Implements Unicode Bidirectional Algorithm for proper display of text containing both left-to-right and right-to-left scripts.

Direction Type

// Direction indicates the overall flow of text
type Direction int

const (
    LeftToRight Direction = iota // No right-to-left characters
    RightToLeft                  // No left-to-right characters
    Mixed                        // Both LTR and RTL characters
    Neutral                      // No LTR or RTL characters
)

Class Type

// Class is the Unicode BiDi class
type Class uint

const (
    L   Class = iota // LeftToRight
    R                // RightToLeft
    EN               // EuropeanNumber
    ES               // EuropeanSeparator
    ET               // EuropeanTerminator
    AN               // ArabicNumber
    CS               // CommonSeparator
    B                // ParagraphSeparator
    S                // SegmentSeparator
    WS               // WhiteSpace
    ON               // OtherNeutral
    BN               // BoundaryNeutral
    NSM              // NonspacingMark
    AL               // ArabicLetter
    Control          // Control LRO - PDI
    LRO              // LeftToRightOverride
    RLO              // RightToLeftOverride
    LRE              // LeftToRightEmbedding
    RLE              // RightToLeftEmbedding
    PDF              // PopDirectionalFormat
    LRI              // LeftToRightIsolate
    RLI              // RightToLeftIsolate
    FSI              // FirstStrongIsolate
    PDI              // PopDirectionalIsolate
)

Paragraph Type

// Paragraph holds a single paragraph for Bidi processing
type Paragraph struct{}

func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error)
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error)
func (p *Paragraph) IsLeftToRight() bool
func (p *Paragraph) Direction() Direction
func (p *Paragraph) RunAt(pos int) Run
func (p *Paragraph) Order() (Ordering, error)
func (p *Paragraph) Line(start, end int) (Ordering, error)

Ordering Type

// Ordering holds the computed visual order of runs
type Ordering struct{}

func (o *Ordering) Direction() Direction
func (o *Ordering) NumRuns() int
func (o *Ordering) Run(i int) Run

Run Type

// Run is a continuous sequence of characters of a single direction
type Run struct{}

func (r Run) String() string
func (r Run) Bytes() []byte
func (r Run) Direction() Direction
func (r Run) Pos() (start, end int)

Properties Type

// Properties provides access to BiDi properties of runes
type Properties struct{}

func Lookup(s []byte) (p Properties, sz int)
func LookupString(s string) (p Properties, sz int)
func LookupRune(r rune) (p Properties, size int)

func (p Properties) Class() Class
func (p Properties) IsBracket() bool
func (p Properties) IsOpeningBracket() bool

Option Type

// Option is an option for Bidi processing
type Option func(*options)

func DefaultDirection(d Direction) Option

Functions

func ReverseString(s string) string
func AppendReverse(out, in []byte) []byte

Constants

const UnicodeVersion string = "15.0.0"

Usage Examples

import "golang.org/x/text/unicode/bidi"

// Process a paragraph
var p bidi.Paragraph
p.SetString("Hello עברית World", bidi.DefaultDirection(bidi.LeftToRight))

// Check overall direction
if p.IsLeftToRight() {
    // Entire paragraph is LTR
}
dir := p.Direction() // Mixed, LeftToRight, RightToLeft, or Neutral

// Get visual ordering for display
order, err := p.Order()
if err != nil {
    // Handle error
}

// Iterate over runs in visual order
for i := 0; i < order.NumRuns(); i++ {
    run := order.Run(i)
    text := run.String()
    direction := run.Direction()
    start, end := run.Pos()
    // Display run in appropriate direction
}

// Process a single line within a paragraph
lineOrdering, err := p.Line(0, 50)

// Get run at specific position
run := p.RunAt(10)

// Look up BiDi properties
props, size := bidi.LookupString("A")
class := props.Class() // L (LeftToRight)

props, size = bidi.LookupString("א")
class = props.Class() // R (RightToLeft)

// Reverse string for RTL display
reversed := bidi.ReverseString("Hello")

// Reverse bytes
out := bidi.AppendReverse(nil, []byte("Hello"))

CLDR Package

Import path: golang.org/x/text/unicode/cldr

Provides parser for LDML and related XML formats from the Unicode Common Locale Data Repository.

CLDR Type

// CLDR provides access to parsed CLDR data
type CLDR struct{}

func (cldr *CLDR) Locales() []string
func (cldr *CLDR) LDML(loc string) (*LDML, error)
func (cldr *CLDR) RawLDML(loc string) *LDML
func (cldr *CLDR) Supplemental() *SupplementalData
func (cldr *CLDR) BCP47() *LDMLBCP47
func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool)

Decoder Type

// Decoder loads CLDR data archives
type Decoder struct{}

func (d *Decoder) SetDirFilter(dir ...string)
func (d *Decoder) SetSectionFilter(filter ...string)
func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error)
func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error)
func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error)

Draft Type

// Draft indicates the draft level of an element
type Draft int

const (
    Approved Draft = iota
    Contributed
    Provisional
    Unconfirmed
)

func ParseDraft(level string) (Draft, error)
func (d Draft) String() string

Functions

func Key(e Elem, exclude ...string) string

Constants

const Version string = "32"

Usage Examples

import "golang.org/x/text/unicode/cldr"

// Decode CLDR data from path
var d cldr.Decoder
cldrData, err := d.DecodePath("/path/to/cldr")

// Get list of available locales
locales := cldrData.Locales()

// Get LDML data for specific locale
ldml := cldrData.RawLDML("en")

// Get supplemental data
supp := cldrData.Supplemental()

// Get BCP47 data
bcp47 := cldrData.BCP47()

// Set draft level filtering
cldrData.SetDraftLevel(cldr.Contributed, false)

// Filter directories when loading
d.SetDirFilter("main", "supplemental")

// Decode from zip
zipReader := // ... open zip file
cldrData, err = d.DecodeZip(zipReader)

Range Table Package

Import path: golang.org/x/text/unicode/rangetable

Provides utilities for creating and inspecting unicode.RangeTables.

Functions

// New creates a RangeTable from the given runes
func New(r ...rune) *unicode.RangeTable

// Merge returns a new RangeTable that is the union of the given tables
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable

// Assigned returns a RangeTable with all assigned code points for a Unicode version
func Assigned(version string) *unicode.RangeTable

// Visit visits all runes in the given RangeTable in order
func Visit(rt *unicode.RangeTable, fn func(rune))

Usage Examples

import (
    "unicode"
    "golang.org/x/text/unicode/rangetable"
)

// Create RangeTable from specific runes
rt := rangetable.New('a', 'b', 'c', 'x', 'y', 'z')

// Merge multiple range tables
combined := rangetable.Merge(
    unicode.Latin,
    unicode.Greek,
    unicode.Cyrillic,
)

// Get assigned characters for Unicode version
assigned := rangetable.Assigned("13.0.0")

// Visit all runes in a table
rangetable.Visit(unicode.Letter, func(r rune) {
    fmt.Printf("%c ", r)
})

// Use with unicode.Is
if unicode.Is(rt, 'a') {
    // 'a' is in the range table
}

Rune Names Package

Import path: golang.org/x/text/unicode/runenames

Provides Unicode character names from the Unicode Character Database.

Functions

// Name returns the name for r
func Name(r rune) string

Constants

const UnicodeVersion string = "15.0.0"

Usage Examples

import "golang.org/x/text/unicode/runenames"

// Get character names
name := runenames.Name('A')     // "LATIN CAPITAL LETTER A"
name = runenames.Name('€')      // "EURO SIGN"
name = runenames.Name('😀')     // "GRINNING FACE"
name = runenames.Name('\u0301') // "COMBINING ACUTE ACCENT"

// Unknown characters return empty string
name = runenames.Name('\uFFFE') // ""

Common Patterns

Text Normalization Pipeline

import (
    "golang.org/x/text/unicode/norm"
    "golang.org/x/text/transform"
)

// Normalize and process text
func normalizeText(input string) (string, error) {
    // Normalize to NFC
    normalized := norm.NFC.String(input)

    // Additional processing...

    return normalized, nil
}

// Stream-based normalization
func normalizeStream(r io.Reader, w io.Writer) error {
    // Create normalized reader
    normalized := norm.NFC.Reader(r)

    // Copy to output
    _, err := io.Copy(w, normalized)
    return err
}

Comparing Text for Equality

import "golang.org/x/text/unicode/norm"

// Compare two strings for canonical equivalence
func areEqual(a, b string) bool {
    // Normalize both to same form
    aNorm := norm.NFC.String(a)
    bNorm := norm.NFC.String(b)

    return aNorm == bNorm
}

// More efficient: check if already normalized
func areEqualEfficient(a, b string) bool {
    // If both already in NFC, just compare
    if norm.NFC.IsNormalString(a) && norm.NFC.IsNormalString(b) {
        return a == b
    }

    // Otherwise normalize and compare
    return norm.NFC.String(a) == norm.NFC.String(b)
}

Bidirectional Text Display

import (
    "strings"
    "golang.org/x/text/unicode/bidi"
)

type DisplayRun struct {
    Text      string
    Direction bidi.Direction
    StartPos  int
    EndPos    int
}

func layoutParagraph(text string) []DisplayRun {
    var p bidi.Paragraph
    p.SetString(text)

    order, err := p.Order()
    if err != nil {
        return nil
    }

    runs := make([]DisplayRun, order.NumRuns())
    for i := 0; i < order.NumRuns(); i++ {
        run := order.Run(i)
        start, end := run.Pos()

        runs[i] = DisplayRun{
            Text:      run.String(),
            Direction: run.Direction(),
            StartPos:  start,
            EndPos:    end,
        }
    }

    return runs
}

Unicode Character Inspection

import (
    "fmt"
    "golang.org/x/text/unicode/norm"
    "golang.org/x/text/unicode/runenames"
    "golang.org/x/text/unicode/bidi"
)

func inspectCharacter(r rune) {
    // Get character name
    name := runenames.Name(r)
    fmt.Printf("Name: %s\n", name)

    // Get normalization properties
    props := norm.NFC.PropertiesString(string(r))
    fmt.Printf("CCC: %d\n", props.CCC())

    if decomp := props.Decomposition(); len(decomp) > 0 {
        fmt.Printf("Decomposes to: %q\n", decomp)
    }

    // Get BiDi properties
    bidiProps, _ := bidi.LookupRune(r)
    fmt.Printf("BiDi class: %v\n", bidiProps.Class())

    // Check if combining character
    if props.CCC() > 0 {
        fmt.Println("This is a combining character")
    }
}

Safe Text Comparison with Normalization

import "golang.org/x/text/unicode/norm"

type NormalizedString struct {
    original   string
    normalized string
}

func NewNormalizedString(s string) NormalizedString {
    return NormalizedString{
        original:   s,
        normalized: norm.NFC.String(s),
    }
}

func (ns NormalizedString) String() string {
    return ns.original
}

func (ns NormalizedString) Equals(other NormalizedString) bool {
    return ns.normalized == other.normalized
}

// Map using normalized strings as keys
type NormalizedMap struct {
    data map[string]interface{}
}

func NewNormalizedMap() *NormalizedMap {
    return &NormalizedMap{
        data: make(map[string]interface{}),
    }
}

func (m *NormalizedMap) Set(key string, value interface{}) {
    normalized := norm.NFC.String(key)
    m.data[normalized] = value
}

func (m *NormalizedMap) Get(key string) (interface{}, bool) {
    normalized := norm.NFC.String(key)
    val, ok := m.data[normalized]
    return val, ok
}

Handling Mixed Directionality

import "golang.org/x/text/unicode/bidi"

// Detect if text contains mixed directionality
func hasMixedDirectionality(text string) bool {
    var p bidi.Paragraph
    p.SetString(text)
    return p.Direction() == bidi.Mixed
}

// Split text into directional segments
func splitByDirection(text string) []string {
    var p bidi.Paragraph
    p.SetString(text)

    order, err := p.Order()
    if err != nil {
        return []string{text}
    }

    segments := make([]string, order.NumRuns())
    for i := 0; i < order.NumRuns(); i++ {
        run := order.Run(i)
        segments[i] = run.String()
    }

    return segments
}

Version Information

Based on:

  • Unicode 15.0.0
  • Unicode Bidirectional Algorithm (UAX #9)
  • Unicode Normalization Forms (UAX #15)
  • CLDR 32