This document covers Unicode-related packages including bidirectional text support, normalization, CLDR data access, range tables, and rune name lookups.
Import path: golang.org/x/text/unicode/norm
Provides Unicode normalization forms NFC, NFD, NFKC, and NFKD.
// Form denotes a canonical representation of Unicode code points
type Form int
const (
NFC Form = iota // Unicode Normalization Form C (Canonical Composition)
NFD // Unicode Normalization Form D (Canonical Decomposition)
NFKC // Unicode Normalization Form KC (Compatibility Composition)
NFKD // Unicode Normalization Form KD (Compatibility Decomposition)
)// Transformation methods
func (f Form) Bytes(b []byte) []byte
func (f Form) String(s string) string
func (f Form) Append(out []byte, src ...byte) []byte
func (f Form) AppendString(out []byte, src string) []byte
// Testing methods
func (f Form) IsNormal(b []byte) bool
func (f Form) IsNormalString(s string) bool
// Boundary detection
func (f Form) FirstBoundary(b []byte) int
func (f Form) FirstBoundaryInString(s string) int
func (f Form) LastBoundary(b []byte) int
func (f Form) NextBoundary(b []byte, atEOF bool) int
func (f Form) NextBoundaryInString(s string, atEOF bool) int
// Quick check (partial normalization test)
func (f Form) QuickSpan(b []byte) int
func (f Form) QuickSpanString(s string) int
// Properties
func (f Form) Properties(s []byte) Properties
func (f Form) PropertiesString(s string) Properties
// I/O wrappers
func (f Form) Reader(r io.Reader) io.Reader
func (f Form) Writer(w io.Writer) io.WriteCloser
// Transform interface implementation
func (f Form) Reset()
func (f Form) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (f Form) Span(b []byte, atEOF bool) (n int, err error)
func (f Form) SpanString(s string, atEOF bool) (n int, err error)// Properties provides access to normalization properties of a rune
type Properties struct{}
func (p Properties) BoundaryAfter() bool
func (p Properties) BoundaryBefore() bool
func (p Properties) CCC() uint8
func (p Properties) LeadCCC() uint8
func (p Properties) TrailCCC() uint8
func (p Properties) Decomposition() []byte
func (p Properties) Size() int// Iter iterates over a string or byte slice, normalizing it
type Iter struct{}
func (i *Iter) Init(f Form, src []byte)
func (i *Iter) InitString(f Form, src string)
func (i *Iter) Next() []byte
func (i *Iter) Pos() int
func (i *Iter) Done() bool
func (i *Iter) Seek(offset int64, whence int) (int64, error)const Version string = "15.0.0"
const MaxTransformChunkSize int = 35 + maxNonStarters*4
const GraphemeJoiner string = "\u034F"
const MaxSegmentSize intimport "golang.org/x/text/unicode/norm"
// Normalize strings
nfc := norm.NFC.String("Café") // Composed form
nfd := norm.NFD.String("Café") // Decomposed form
// Test if normalized
isNormal := norm.NFC.IsNormalString("Café")
// Normalize bytes
normalized := norm.NFC.Bytes([]byte("Café"))
// Append normalized form
result := norm.NFC.AppendString(buf, "Café")
// Find normalization boundaries
boundary := norm.NFC.FirstBoundaryInString(text)
// Check if prefix is normalized
n := norm.NFC.QuickSpanString(text) // text[:n] == NFC(text[:n])
// Iterate over normalized segments
var iter norm.Iter
iter.InitString(norm.NFC, "Café")
for !iter.Done() {
segment := iter.Next()
// Process segment
}
// Streaming normalization
normalizedReader := norm.NFC.Reader(inputReader)
normalizedWriter := norm.NFC.Writer(outputWriter)
// Get properties
props := norm.NFC.PropertiesString("é")
ccc := props.CCC() // Canonical combining class
decomp := props.Decomposition() // Decomposition mappingImport path: golang.org/x/text/unicode/bidi
Implements Unicode Bidirectional Algorithm for proper display of text containing both left-to-right and right-to-left scripts.
// Direction indicates the overall flow of text
type Direction int
const (
LeftToRight Direction = iota // No right-to-left characters
RightToLeft // No left-to-right characters
Mixed // Both LTR and RTL characters
Neutral // No LTR or RTL characters
)// Class is the Unicode BiDi class
type Class uint
const (
L Class = iota // LeftToRight
R // RightToLeft
EN // EuropeanNumber
ES // EuropeanSeparator
ET // EuropeanTerminator
AN // ArabicNumber
CS // CommonSeparator
B // ParagraphSeparator
S // SegmentSeparator
WS // WhiteSpace
ON // OtherNeutral
BN // BoundaryNeutral
NSM // NonspacingMark
AL // ArabicLetter
Control // Control LRO - PDI
LRO // LeftToRightOverride
RLO // RightToLeftOverride
LRE // LeftToRightEmbedding
RLE // RightToLeftEmbedding
PDF // PopDirectionalFormat
LRI // LeftToRightIsolate
RLI // RightToLeftIsolate
FSI // FirstStrongIsolate
PDI // PopDirectionalIsolate
)// Paragraph holds a single paragraph for Bidi processing
type Paragraph struct{}
func (p *Paragraph) SetBytes(b []byte, opts ...Option) (n int, err error)
func (p *Paragraph) SetString(s string, opts ...Option) (n int, err error)
func (p *Paragraph) IsLeftToRight() bool
func (p *Paragraph) Direction() Direction
func (p *Paragraph) RunAt(pos int) Run
func (p *Paragraph) Order() (Ordering, error)
func (p *Paragraph) Line(start, end int) (Ordering, error)// Ordering holds the computed visual order of runs
type Ordering struct{}
func (o *Ordering) Direction() Direction
func (o *Ordering) NumRuns() int
func (o *Ordering) Run(i int) Run// Run is a continuous sequence of characters of a single direction
type Run struct{}
func (r Run) String() string
func (r Run) Bytes() []byte
func (r Run) Direction() Direction
func (r Run) Pos() (start, end int)// Properties provides access to BiDi properties of runes
type Properties struct{}
func Lookup(s []byte) (p Properties, sz int)
func LookupString(s string) (p Properties, sz int)
func LookupRune(r rune) (p Properties, size int)
func (p Properties) Class() Class
func (p Properties) IsBracket() bool
func (p Properties) IsOpeningBracket() bool// Option is an option for Bidi processing
type Option func(*options)
func DefaultDirection(d Direction) Optionfunc ReverseString(s string) string
func AppendReverse(out, in []byte) []byteconst UnicodeVersion string = "15.0.0"import "golang.org/x/text/unicode/bidi"
// Process a paragraph
var p bidi.Paragraph
p.SetString("Hello עברית World", bidi.DefaultDirection(bidi.LeftToRight))
// Check overall direction
if p.IsLeftToRight() {
// Entire paragraph is LTR
}
dir := p.Direction() // Mixed, LeftToRight, RightToLeft, or Neutral
// Get visual ordering for display
order, err := p.Order()
if err != nil {
// Handle error
}
// Iterate over runs in visual order
for i := 0; i < order.NumRuns(); i++ {
run := order.Run(i)
text := run.String()
direction := run.Direction()
start, end := run.Pos()
// Display run in appropriate direction
}
// Process a single line within a paragraph
lineOrdering, err := p.Line(0, 50)
// Get run at specific position
run := p.RunAt(10)
// Look up BiDi properties
props, size := bidi.LookupString("A")
class := props.Class() // L (LeftToRight)
props, size = bidi.LookupString("א")
class = props.Class() // R (RightToLeft)
// Reverse string for RTL display
reversed := bidi.ReverseString("Hello")
// Reverse bytes
out := bidi.AppendReverse(nil, []byte("Hello"))Import path: golang.org/x/text/unicode/cldr
Provides parser for LDML and related XML formats from the Unicode Common Locale Data Repository.
// CLDR provides access to parsed CLDR data
type CLDR struct{}
func (cldr *CLDR) Locales() []string
func (cldr *CLDR) LDML(loc string) (*LDML, error)
func (cldr *CLDR) RawLDML(loc string) *LDML
func (cldr *CLDR) Supplemental() *SupplementalData
func (cldr *CLDR) BCP47() *LDMLBCP47
func (cldr *CLDR) SetDraftLevel(lev Draft, preferDraft bool)// Decoder loads CLDR data archives
type Decoder struct{}
func (d *Decoder) SetDirFilter(dir ...string)
func (d *Decoder) SetSectionFilter(filter ...string)
func (d *Decoder) DecodePath(path string) (cldr *CLDR, err error)
func (d *Decoder) DecodeZip(r io.Reader) (cldr *CLDR, err error)
func (d *Decoder) Decode(l Loader) (cldr *CLDR, err error)// Draft indicates the draft level of an element
type Draft int
const (
Approved Draft = iota
Contributed
Provisional
Unconfirmed
)
func ParseDraft(level string) (Draft, error)
func (d Draft) String() stringfunc Key(e Elem, exclude ...string) stringconst Version string = "32"import "golang.org/x/text/unicode/cldr"
// Decode CLDR data from path
var d cldr.Decoder
cldrData, err := d.DecodePath("/path/to/cldr")
// Get list of available locales
locales := cldrData.Locales()
// Get LDML data for specific locale
ldml := cldrData.RawLDML("en")
// Get supplemental data
supp := cldrData.Supplemental()
// Get BCP47 data
bcp47 := cldrData.BCP47()
// Set draft level filtering
cldrData.SetDraftLevel(cldr.Contributed, false)
// Filter directories when loading
d.SetDirFilter("main", "supplemental")
// Decode from zip
zipReader := // ... open zip file
cldrData, err = d.DecodeZip(zipReader)Import path: golang.org/x/text/unicode/rangetable
Provides utilities for creating and inspecting unicode.RangeTables.
// New creates a RangeTable from the given runes
func New(r ...rune) *unicode.RangeTable
// Merge returns a new RangeTable that is the union of the given tables
func Merge(ranges ...*unicode.RangeTable) *unicode.RangeTable
// Assigned returns a RangeTable with all assigned code points for a Unicode version
func Assigned(version string) *unicode.RangeTable
// Visit visits all runes in the given RangeTable in order
func Visit(rt *unicode.RangeTable, fn func(rune))import (
"unicode"
"golang.org/x/text/unicode/rangetable"
)
// Create RangeTable from specific runes
rt := rangetable.New('a', 'b', 'c', 'x', 'y', 'z')
// Merge multiple range tables
combined := rangetable.Merge(
unicode.Latin,
unicode.Greek,
unicode.Cyrillic,
)
// Get assigned characters for Unicode version
assigned := rangetable.Assigned("13.0.0")
// Visit all runes in a table
rangetable.Visit(unicode.Letter, func(r rune) {
fmt.Printf("%c ", r)
})
// Use with unicode.Is
if unicode.Is(rt, 'a') {
// 'a' is in the range table
}Import path: golang.org/x/text/unicode/runenames
Provides Unicode character names from the Unicode Character Database.
// Name returns the name for r
func Name(r rune) stringconst UnicodeVersion string = "15.0.0"import "golang.org/x/text/unicode/runenames"
// Get character names
name := runenames.Name('A') // "LATIN CAPITAL LETTER A"
name = runenames.Name('€') // "EURO SIGN"
name = runenames.Name('😀') // "GRINNING FACE"
name = runenames.Name('\u0301') // "COMBINING ACUTE ACCENT"
// Unknown characters return empty string
name = runenames.Name('\uFFFE') // ""import (
"golang.org/x/text/unicode/norm"
"golang.org/x/text/transform"
)
// Normalize and process text
func normalizeText(input string) (string, error) {
// Normalize to NFC
normalized := norm.NFC.String(input)
// Additional processing...
return normalized, nil
}
// Stream-based normalization
func normalizeStream(r io.Reader, w io.Writer) error {
// Create normalized reader
normalized := norm.NFC.Reader(r)
// Copy to output
_, err := io.Copy(w, normalized)
return err
}import "golang.org/x/text/unicode/norm"
// Compare two strings for canonical equivalence
func areEqual(a, b string) bool {
// Normalize both to same form
aNorm := norm.NFC.String(a)
bNorm := norm.NFC.String(b)
return aNorm == bNorm
}
// More efficient: check if already normalized
func areEqualEfficient(a, b string) bool {
// If both already in NFC, just compare
if norm.NFC.IsNormalString(a) && norm.NFC.IsNormalString(b) {
return a == b
}
// Otherwise normalize and compare
return norm.NFC.String(a) == norm.NFC.String(b)
}import (
"strings"
"golang.org/x/text/unicode/bidi"
)
type DisplayRun struct {
Text string
Direction bidi.Direction
StartPos int
EndPos int
}
func layoutParagraph(text string) []DisplayRun {
var p bidi.Paragraph
p.SetString(text)
order, err := p.Order()
if err != nil {
return nil
}
runs := make([]DisplayRun, order.NumRuns())
for i := 0; i < order.NumRuns(); i++ {
run := order.Run(i)
start, end := run.Pos()
runs[i] = DisplayRun{
Text: run.String(),
Direction: run.Direction(),
StartPos: start,
EndPos: end,
}
}
return runs
}import (
"fmt"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/unicode/runenames"
"golang.org/x/text/unicode/bidi"
)
func inspectCharacter(r rune) {
// Get character name
name := runenames.Name(r)
fmt.Printf("Name: %s\n", name)
// Get normalization properties
props := norm.NFC.PropertiesString(string(r))
fmt.Printf("CCC: %d\n", props.CCC())
if decomp := props.Decomposition(); len(decomp) > 0 {
fmt.Printf("Decomposes to: %q\n", decomp)
}
// Get BiDi properties
bidiProps, _ := bidi.LookupRune(r)
fmt.Printf("BiDi class: %v\n", bidiProps.Class())
// Check if combining character
if props.CCC() > 0 {
fmt.Println("This is a combining character")
}
}import "golang.org/x/text/unicode/norm"
type NormalizedString struct {
original string
normalized string
}
func NewNormalizedString(s string) NormalizedString {
return NormalizedString{
original: s,
normalized: norm.NFC.String(s),
}
}
func (ns NormalizedString) String() string {
return ns.original
}
func (ns NormalizedString) Equals(other NormalizedString) bool {
return ns.normalized == other.normalized
}
// Map using normalized strings as keys
type NormalizedMap struct {
data map[string]interface{}
}
func NewNormalizedMap() *NormalizedMap {
return &NormalizedMap{
data: make(map[string]interface{}),
}
}
func (m *NormalizedMap) Set(key string, value interface{}) {
normalized := norm.NFC.String(key)
m.data[normalized] = value
}
func (m *NormalizedMap) Get(key string) (interface{}, bool) {
normalized := norm.NFC.String(key)
val, ok := m.data[normalized]
return val, ok
}import "golang.org/x/text/unicode/bidi"
// Detect if text contains mixed directionality
func hasMixedDirectionality(text string) bool {
var p bidi.Paragraph
p.SetString(text)
return p.Direction() == bidi.Mixed
}
// Split text into directional segments
func splitByDirection(text string) []string {
var p bidi.Paragraph
p.SetString(text)
order, err := p.Order()
if err != nil {
return []string{text}
}
segments := make([]string, order.NumRuns())
for i := 0; i < order.NumRuns(); i++ {
run := order.Run(i)
segments[i] = run.String()
}
return segments
}Based on: