or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

collation.mdencoding.mdformatting.mdindex.mdlanguage.mdlocalization.mdsearch-and-security.mdtext-transformation.mdunicode.md
tile.json

text-transformation.mddocs/

Text Transformation

This document covers text transformation packages including the general transformation framework, case mapping, rune operations, and width conversion.

Package Overview

  • transform: General framework for byte transformations
  • cases: Language-specific case mapping
  • runes: UTF-8 rune transformations
  • width: Character width conversions (half-width/full-width)

Transform Package

Import path: golang.org/x/text/transform

Provides reader and writer wrappers that transform bytes passing through, as well as various transformation utilities.

Transformer Interface

// Transformer transforms bytes
type Transformer interface {
    Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
    Reset()
}

// SpanningTransformer extends Transformer with a Span method
type SpanningTransformer interface {
    Transformer
    Span(src []byte, atEOF bool) (n int, err error)
}

Reader and Writer

// Reader wraps another io.Reader by transforming the bytes read
type Reader struct{}

func NewReader(r io.Reader, t Transformer) *Reader
func (r *Reader) Read(p []byte) (int, error)

// Writer wraps another io.Writer by transforming the bytes written
type Writer struct{}

func NewWriter(w io.Writer, t Transformer) *Writer
func (w *Writer) Write(data []byte) (n int, err error)
func (w *Writer) Close() error

Utility Functions

// String returns a string with the result of converting s[:n] using t
func String(t Transformer, s string) (result string, n int, err error)

// Bytes returns a new byte slice with the result of converting b[:n] using t
func Bytes(t Transformer, b []byte) (result []byte, n int, err error)

// Append appends the result of converting src[:n] using t to dst
func Append(t Transformer, dst, src []byte) (result []byte, n int, err error)

// Chain returns a Transformer that applies t in sequence
func Chain(t ...Transformer) Transformer

// RemoveFunc returns a Transformer that removes runes satisfying f
// Deprecated: Use runes.Remove instead
func RemoveFunc(f func(r rune) bool) Transformer

Predefined Transformers

// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing
var Discard Transformer

// Nop is a SpanningTransformer that copies src to dst
var Nop SpanningTransformer

NopResetter

// NopResetter can be embedded to add a nop Reset method
type NopResetter struct{}

func (NopResetter) Reset()

Errors

// ErrShortDst means the destination buffer was too short
var ErrShortDst error

// ErrShortSrc means the source buffer has insufficient data
var ErrShortSrc error

// ErrEndOfSpan means input and output are not identical
var ErrEndOfSpan error

Usage Examples

import (
    "io"
    "golang.org/x/text/transform"
    "golang.org/x/text/unicode/norm"
)

// Transform a string
result, n, err := transform.String(norm.NFC, "Café")

// Transform bytes
resultBytes, n, err := transform.Bytes(norm.NFC, []byte("Café"))

// Append transformed bytes
dst := make([]byte, 0, 100)
dst, n, err = transform.Append(norm.NFC, dst, []byte("Café"))

// Chain multiple transformers
chained := transform.Chain(norm.NFD, norm.NFC)
result, n, err = transform.String(chained, "text")

// Streaming transformation with Reader
reader := transform.NewReader(inputReader, norm.NFC)
io.Copy(output, reader)

// Streaming transformation with Writer
writer := transform.NewWriter(outputWriter, norm.NFC)
writer.Write([]byte("Café"))
writer.Close()

Cases Package

Import path: golang.org/x/text/cases

Provides general and language-specific case mapping (upper, lower, title, fold).

Caser Type

// Caser transforms text to a certain case
// Implements transform.Transformer
type Caser interface {
    transform.Transformer
    String(s string) string
    Bytes(b []byte) []byte
    Reset()
    Span(src []byte, atEOF bool) (n int, err error)
    Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
}

Caser Constructors

// Upper returns a Caser for language-specific uppercasing
func Upper(t language.Tag, opts ...Option) Caser

// Lower returns a Caser for language-specific lowercasing
func Lower(t language.Tag, opts ...Option) Caser

// Title returns a Caser for language-specific title casing
func Title(t language.Tag, opts ...Option) Caser

// Fold returns a Caser that implements Unicode case folding
// The returned Caser is stateless and safe for concurrent use
func Fold(opts ...Option) Caser

Option Type

// Option is used to modify the behavior of a Caser
type Option func(o options) options

// NoLower disables lowercasing of non-leading letters for title caser
var NoLower Option

// Compact omits mappings in case folding for characters that would grow
// (Currently unimplemented)
var Compact Option

// HandleFinalSigma specifies whether special handling of Greek final sigma should be enabled
func HandleFinalSigma(enable bool) Option

Variables

// Supported defines coverage of supported languages for case mapping
var Supported language.Coverage

Constants

const UnicodeVersion string = "15.0.0"

Usage Examples

import (
    "golang.org/x/text/cases"
    "golang.org/x/text/language"
)

// Language-specific uppercasing
upper := cases.Upper(language.English)
result := upper.String("hello world") // "HELLO WORLD"

// Turkish uppercasing (i → İ, not I)
turkishUpper := cases.Upper(language.Turkish)
result = turkishUpper.String("istanbul") // "İSTANBUL"

// Lowercasing
lower := cases.Lower(language.English)
result = lower.String("HELLO WORLD") // "hello world"

// Title casing (capitalize first letter of each word)
title := cases.Title(language.English)
result = title.String("hello world") // "Hello World"

// Title with NoLower option (don't lowercase other letters)
titleNoLower := cases.Title(language.English, cases.NoLower)
result = titleNoLower.String("hello WORLD") // "Hello WORLD"

// Case folding (for case-insensitive comparison)
fold := cases.Fold()
result = fold.String("Hello WORLD") // normalized case

// Transform bytes
upperBytes := upper.Bytes([]byte("hello"))

// Use with transform.Chain
import "golang.org/x/text/transform"

combined := transform.Chain(
    cases.Lower(language.English),
    // ... other transformers
)

// Greek final sigma handling
greekCaser := cases.Lower(language.Greek, cases.HandleFinalSigma(true))
result = greekCaser.String("ΜΆΣΣΟΣ") // "μάσσος" (with final σ)

Runes Package

Import path: golang.org/x/text/runes

Provides transforms for UTF-8 encoded text at the rune level.

Set Interface

// Set is a collection of runes
type Set interface {
    Contains(r rune) bool
}

Set Constructors

// In creates a Set with runes in the given RangeTable
func In(rt *unicode.RangeTable) Set

// NotIn creates a Set with runes not in the given RangeTable
func NotIn(rt *unicode.RangeTable) Set

// Predicate creates a Set with a Contains method that returns f(r)
func Predicate(f func(rune) bool) Set

Transformer Type

// Transformer implements transform.Transformer
type Transformer struct{}

func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error)
func (t Transformer) Reset()
func (t Transformer) String(s string) string
func (t Transformer) Bytes(b []byte) []byte

Transformer Constructors

// Map returns a Transformer that maps runes using the given mapping
func Map(mapping func(rune) rune) Transformer

// Remove returns a Transformer that removes runes r for which s.Contains(r)
func Remove(s Set) Transformer

// If returns a transformer that applies tIn to consecutive runes for which
// s.Contains(r) and tNotIn for consecutive runes for which !s.Contains(r)
func If(s Set, tIn, tNotIn transform.Transformer) Transformer

// ReplaceIllFormed returns a transformer that replaces all invalid UTF-8
// sequences with utf8.RuneError
func ReplaceIllFormed() Transformer

Usage Examples

import (
    "unicode"
    "golang.org/x/text/runes"
    "golang.org/x/text/transform"
)

// Remove all non-letter runes
removeNonLetters := runes.Remove(runes.In(unicode.Letter))
result := removeNonLetters.String("Hello, 123 World!") // "HelloWorld"

// Map runes with a function
toUpper := runes.Map(func(r rune) rune {
    return unicode.ToUpper(r)
})
result = toUpper.String("hello") // "HELLO"

// Remove specific runes
removePunctuation := runes.Remove(runes.In(unicode.Punct))
result = removePunctuation.String("Hello, World!") // "Hello World"

// Keep only specific runes
keepDigits := runes.Remove(runes.NotIn(unicode.Digit))
result = keepDigits.String("Price: $123.45") // "12345"

// Use predicate for complex conditions
removeVowels := runes.Remove(runes.Predicate(func(r rune) bool {
    return r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u'
}))
result = removeVowels.String("hello world") // "hll wrld"

// Conditional transformation
onlyUppercaseLetters := runes.If(
    runes.In(unicode.Letter),
    runes.Map(unicode.ToUpper),
    runes.Remove(runes.Predicate(func(r rune) bool { return true })),
)
result = onlyUppercaseLetters.String("Hello 123 World!") // "HELLOWORLD"

// Replace ill-formed UTF-8
fixUTF8 := runes.ReplaceIllFormed()
result = fixUTF8.String(invalidUTF8String)

// Chain with other transformers
import "golang.org/x/text/unicode/norm"

normalized := transform.Chain(
    norm.NFD,
    runes.Remove(runes.In(unicode.Mn)), // Remove combining marks
    norm.NFC,
)
result, _, _ = transform.String(normalized, "Café") // "Cafe"

Width Package

Import path: golang.org/x/text/width

Provides functionality for handling different character widths (half-width/full-width conversions).

Kind Type

// Kind indicates the type of width property
type Kind int

const (
    Neutral             Kind = iota // Do not occur in legacy East Asian character sets
    EastAsianAmbiguous               // Can be sometimes wide and sometimes narrow
    EastAsianWide                    // Wide in its usual form
    EastAsianNarrow                  // Narrow in its usual form
    EastAsianFullwidth               // Compatibility decomposition of type wide
    EastAsianHalfwidth               // Compatibility decomposition of type narrow
)

func (k Kind) String() string

Properties Type

// Properties provides access to width properties of a rune
type Properties struct{}

func Lookup(b []byte) (p Properties, size int)
func LookupString(s string) (p Properties, size int)
func LookupRune(r rune) Properties

func (p Properties) Kind() Kind
func (p Properties) Narrow() rune
func (p Properties) Wide() rune
func (p Properties) Folded() rune

Transformer Type

// Transformer implements transform.Transformer
type Transformer struct{}

func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error)
func (t Transformer) Reset()
func (t Transformer) String(s string) string
func (t Transformer) Bytes(b []byte) []byte

Predefined Transformers

// Fold maps all runes to their canonical width
var Fold Transformer

// Widen maps runes to their wide variant, if available
var Widen Transformer

// Narrow maps runes to their narrow variant, if available
var Narrow Transformer

Constants

const UnicodeVersion string = "15.0.0"

Usage Examples

import "golang.org/x/text/width"

// Convert to full-width
result := width.Widen.String("ABC123") // "ABC123"

// Convert to half-width
result = width.Narrow.String("ABC123") // "ABC123"

// Fold to canonical width (typically narrow)
result = width.Fold.String("ABC123") // "ABC123"

// Transform bytes
widened := width.Widen.Bytes([]byte("Hello"))

// Look up width properties
props, size := width.LookupString("A")
kind := props.Kind() // EastAsianNarrow

wide := props.Wide()   // 'A' (full-width)
narrow := props.Narrow() // 'A' (already narrow)
folded := props.Folded() // 'A' (canonical form)

// Check specific characters
props = width.LookupRune('A') // Full-width A
if props.Kind() == width.EastAsianFullwidth {
    narrow := props.Narrow() // 'A'
}

// Use with transform.Chain
import "golang.org/x/text/transform"

normalized := transform.Chain(
    width.Fold,
    // ... other transformers
)

Common Patterns

Text Normalization Pipeline

import (
    "golang.org/x/text/transform"
    "golang.org/x/text/unicode/norm"
    "golang.org/x/text/cases"
    "golang.org/x/text/runes"
    "golang.org/x/text/width"
    "unicode"
)

// Normalize text for comparison
func normalizeForComparison(s string) string {
    t := transform.Chain(
        // Normalize Unicode
        norm.NFD,
        // Remove diacritics
        runes.Remove(runes.In(unicode.Mn)),
        norm.NFC,
        // Normalize width
        width.Fold,
        // Case fold
        cases.Fold(),
    )

    result, _, _ := transform.String(t, s)
    return result
}

Sanitizing User Input

import (
    "unicode"
    "golang.org/x/text/runes"
    "golang.org/x/text/transform"
)

// Remove control characters and keep only printable text
func sanitizeInput(s string) string {
    t := runes.Remove(runes.Predicate(func(r rune) bool {
        return unicode.IsControl(r) && !unicode.IsSpace(r)
    }))

    result, _, _ := transform.String(t, s)
    return result
}

// Keep only alphanumeric and basic punctuation
func keepSafeCharacters(s string) string {
    safe := runes.Predicate(func(r rune) bool {
        return unicode.IsLetter(r) ||
               unicode.IsNumber(r) ||
               unicode.IsSpace(r) ||
               r == '.' || r == ',' || r == '!' || r == '?'
    })

    t := runes.Remove(runes.Predicate(func(r rune) bool {
        return !safe.Contains(r)
    }))

    result, _, _ := transform.String(t, s)
    return result
}

Custom Rune Mapping

import "golang.org/x/text/runes"

// ROT13 cipher
func rot13(s string) string {
    t := runes.Map(func(r rune) rune {
        if r >= 'a' && r <= 'z' {
            return 'a' + (r-'a'+13)%26
        }
        if r >= 'A' && r <= 'Z' {
            return 'A' + (r-'A'+13)%26
        }
        return r
    })

    return t.String(s)
}

// Obfuscate text by replacing vowels with asterisks
func obfuscateVowels(s string) string {
    t := runes.Map(func(r rune) rune {
        vowels := "aeiouAEIOU"
        for _, v := range vowels {
            if r == v {
                return '*'
            }
        }
        return r
    })

    return t.String(s)
}

Language-Specific Text Processing

import (
    "golang.org/x/text/cases"
    "golang.org/x/text/language"
)

// Process text based on language
func processText(text string, lang language.Tag) string {
    // Use language-appropriate case mapping
    title := cases.Title(lang)
    return title.String(text)
}

// Example: Turkish vs English
text := "istanbul"
enTitle := cases.Title(language.English).String(text)  // "Istanbul"
trTitle := cases.Title(language.Turkish).String(text)  // "İstanbul"

Streaming Text Transformation

import (
    "io"
    "golang.org/x/text/transform"
    "golang.org/x/text/unicode/norm"
    "golang.org/x/text/width"
)

// Process large file with transformations
func processLargeFile(input io.Reader, output io.Writer) error {
    // Create transformation pipeline
    t := transform.Chain(
        norm.NFC,
        width.Fold,
    )

    // Wrap reader with transformer
    reader := transform.NewReader(input, t)

    // Copy transformed data to output
    _, err := io.Copy(output, reader)
    return err
}

// Write transformed data
func writeTransformed(w io.Writer, data []byte) error {
    t := transform.Chain(
        norm.NFC,
        width.Fold,
    )

    writer := transform.NewWriter(w, t)
    defer writer.Close()

    _, err := writer.Write(data)
    return err
}

Building Custom Transformers

import "golang.org/x/text/transform"

// Custom transformer that converts spaces to underscores
type spaceToUnderscore struct {
    transform.NopResetter
}

func (t spaceToUnderscore) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
    for nSrc < len(src) {
        if nDst >= len(dst) {
            return nDst, nSrc, transform.ErrShortDst
        }

        if src[nSrc] == ' ' {
            dst[nDst] = '_'
        } else {
            dst[nDst] = src[nSrc]
        }

        nDst++
        nSrc++
    }

    return nDst, nSrc, nil
}

// Use the custom transformer
func convertSpaces(s string) string {
    t := spaceToUnderscore{}
    result, _, _ := transform.String(t, s)
    return result
}

Width-Aware Text Processing

import (
    "golang.org/x/text/width"
    "strings"
)

// Normalize mixed-width text
func normalizeWidth(s string) string {
    return width.Fold.String(s)
}

// Check if string contains full-width characters
func hasFullWidth(s string) bool {
    for _, r := range s {
        props := width.LookupRune(r)
        if props.Kind() == width.EastAsianFullwidth ||
           props.Kind() == width.EastAsianWide {
            return true
        }
    }
    return false
}

// Convert between widths while preserving layout
func toFullWidth(s string) string {
    return width.Widen.String(s)
}

func toHalfWidth(s string) string {
    return width.Narrow.String(s)
}

Version Information

Based on:

  • Unicode 15.0.0
  • Unicode Standard Annex #11 (East Asian Width)
  • Unicode Default Case Conversion algorithm