This document covers text transformation packages including the general transformation framework, case mapping, rune operations, and width conversion.
Import path: golang.org/x/text/transform
Provides reader and writer wrappers that transform bytes passing through, as well as various transformation utilities.
// Transformer transforms bytes
type Transformer interface {
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
Reset()
}
// SpanningTransformer extends Transformer with a Span method
type SpanningTransformer interface {
Transformer
Span(src []byte, atEOF bool) (n int, err error)
}// Reader wraps another io.Reader by transforming the bytes read
type Reader struct{}
func NewReader(r io.Reader, t Transformer) *Reader
func (r *Reader) Read(p []byte) (int, error)
// Writer wraps another io.Writer by transforming the bytes written
type Writer struct{}
func NewWriter(w io.Writer, t Transformer) *Writer
func (w *Writer) Write(data []byte) (n int, err error)
func (w *Writer) Close() error// String returns a string with the result of converting s[:n] using t
func String(t Transformer, s string) (result string, n int, err error)
// Bytes returns a new byte slice with the result of converting b[:n] using t
func Bytes(t Transformer, b []byte) (result []byte, n int, err error)
// Append appends the result of converting src[:n] using t to dst
func Append(t Transformer, dst, src []byte) (result []byte, n int, err error)
// Chain returns a Transformer that applies t in sequence
func Chain(t ...Transformer) Transformer
// RemoveFunc returns a Transformer that removes runes satisfying f
// Deprecated: Use runes.Remove instead
func RemoveFunc(f func(r rune) bool) Transformer// Discard is a Transformer for which all Transform calls succeed
// by consuming all bytes and writing nothing
var Discard Transformer
// Nop is a SpanningTransformer that copies src to dst
var Nop SpanningTransformer// NopResetter can be embedded to add a nop Reset method
type NopResetter struct{}
func (NopResetter) Reset()// ErrShortDst means the destination buffer was too short
var ErrShortDst error
// ErrShortSrc means the source buffer has insufficient data
var ErrShortSrc error
// ErrEndOfSpan means input and output are not identical
var ErrEndOfSpan errorimport (
"io"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
)
// Transform a string
result, n, err := transform.String(norm.NFC, "Café")
// Transform bytes
resultBytes, n, err := transform.Bytes(norm.NFC, []byte("Café"))
// Append transformed bytes
dst := make([]byte, 0, 100)
dst, n, err = transform.Append(norm.NFC, dst, []byte("Café"))
// Chain multiple transformers
chained := transform.Chain(norm.NFD, norm.NFC)
result, n, err = transform.String(chained, "text")
// Streaming transformation with Reader
reader := transform.NewReader(inputReader, norm.NFC)
io.Copy(output, reader)
// Streaming transformation with Writer
writer := transform.NewWriter(outputWriter, norm.NFC)
writer.Write([]byte("Café"))
writer.Close()Import path: golang.org/x/text/cases
Provides general and language-specific case mapping (upper, lower, title, fold).
// Caser transforms text to a certain case
// Implements transform.Transformer
type Caser interface {
transform.Transformer
String(s string) string
Bytes(b []byte) []byte
Reset()
Span(src []byte, atEOF bool) (n int, err error)
Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
}// Upper returns a Caser for language-specific uppercasing
func Upper(t language.Tag, opts ...Option) Caser
// Lower returns a Caser for language-specific lowercasing
func Lower(t language.Tag, opts ...Option) Caser
// Title returns a Caser for language-specific title casing
func Title(t language.Tag, opts ...Option) Caser
// Fold returns a Caser that implements Unicode case folding
// The returned Caser is stateless and safe for concurrent use
func Fold(opts ...Option) Caser// Option is used to modify the behavior of a Caser
type Option func(o options) options
// NoLower disables lowercasing of non-leading letters for title caser
var NoLower Option
// Compact omits mappings in case folding for characters that would grow
// (Currently unimplemented)
var Compact Option
// HandleFinalSigma specifies whether special handling of Greek final sigma should be enabled
func HandleFinalSigma(enable bool) Option// Supported defines coverage of supported languages for case mapping
var Supported language.Coverageconst UnicodeVersion string = "15.0.0"import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
// Language-specific uppercasing
upper := cases.Upper(language.English)
result := upper.String("hello world") // "HELLO WORLD"
// Turkish uppercasing (i → İ, not I)
turkishUpper := cases.Upper(language.Turkish)
result = turkishUpper.String("istanbul") // "İSTANBUL"
// Lowercasing
lower := cases.Lower(language.English)
result = lower.String("HELLO WORLD") // "hello world"
// Title casing (capitalize first letter of each word)
title := cases.Title(language.English)
result = title.String("hello world") // "Hello World"
// Title with NoLower option (don't lowercase other letters)
titleNoLower := cases.Title(language.English, cases.NoLower)
result = titleNoLower.String("hello WORLD") // "Hello WORLD"
// Case folding (for case-insensitive comparison)
fold := cases.Fold()
result = fold.String("Hello WORLD") // normalized case
// Transform bytes
upperBytes := upper.Bytes([]byte("hello"))
// Use with transform.Chain
import "golang.org/x/text/transform"
combined := transform.Chain(
cases.Lower(language.English),
// ... other transformers
)
// Greek final sigma handling
greekCaser := cases.Lower(language.Greek, cases.HandleFinalSigma(true))
result = greekCaser.String("ΜΆΣΣΟΣ") // "μάσσος" (with final σ)Import path: golang.org/x/text/runes
Provides transforms for UTF-8 encoded text at the rune level.
// Set is a collection of runes
type Set interface {
Contains(r rune) bool
}// In creates a Set with runes in the given RangeTable
func In(rt *unicode.RangeTable) Set
// NotIn creates a Set with runes not in the given RangeTable
func NotIn(rt *unicode.RangeTable) Set
// Predicate creates a Set with a Contains method that returns f(r)
func Predicate(f func(rune) bool) Set// Transformer implements transform.Transformer
type Transformer struct{}
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t Transformer) Span(b []byte, atEOF bool) (n int, err error)
func (t Transformer) Reset()
func (t Transformer) String(s string) string
func (t Transformer) Bytes(b []byte) []byte// Map returns a Transformer that maps runes using the given mapping
func Map(mapping func(rune) rune) Transformer
// Remove returns a Transformer that removes runes r for which s.Contains(r)
func Remove(s Set) Transformer
// If returns a transformer that applies tIn to consecutive runes for which
// s.Contains(r) and tNotIn for consecutive runes for which !s.Contains(r)
func If(s Set, tIn, tNotIn transform.Transformer) Transformer
// ReplaceIllFormed returns a transformer that replaces all invalid UTF-8
// sequences with utf8.RuneError
func ReplaceIllFormed() Transformerimport (
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
)
// Remove all non-letter runes
removeNonLetters := runes.Remove(runes.In(unicode.Letter))
result := removeNonLetters.String("Hello, 123 World!") // "HelloWorld"
// Map runes with a function
toUpper := runes.Map(func(r rune) rune {
return unicode.ToUpper(r)
})
result = toUpper.String("hello") // "HELLO"
// Remove specific runes
removePunctuation := runes.Remove(runes.In(unicode.Punct))
result = removePunctuation.String("Hello, World!") // "Hello World"
// Keep only specific runes
keepDigits := runes.Remove(runes.NotIn(unicode.Digit))
result = keepDigits.String("Price: $123.45") // "12345"
// Use predicate for complex conditions
removeVowels := runes.Remove(runes.Predicate(func(r rune) bool {
return r == 'a' || r == 'e' || r == 'i' || r == 'o' || r == 'u'
}))
result = removeVowels.String("hello world") // "hll wrld"
// Conditional transformation
onlyUppercaseLetters := runes.If(
runes.In(unicode.Letter),
runes.Map(unicode.ToUpper),
runes.Remove(runes.Predicate(func(r rune) bool { return true })),
)
result = onlyUppercaseLetters.String("Hello 123 World!") // "HELLOWORLD"
// Replace ill-formed UTF-8
fixUTF8 := runes.ReplaceIllFormed()
result = fixUTF8.String(invalidUTF8String)
// Chain with other transformers
import "golang.org/x/text/unicode/norm"
normalized := transform.Chain(
norm.NFD,
runes.Remove(runes.In(unicode.Mn)), // Remove combining marks
norm.NFC,
)
result, _, _ = transform.String(normalized, "Café") // "Cafe"Import path: golang.org/x/text/width
Provides functionality for handling different character widths (half-width/full-width conversions).
// Kind indicates the type of width property
type Kind int
const (
Neutral Kind = iota // Do not occur in legacy East Asian character sets
EastAsianAmbiguous // Can be sometimes wide and sometimes narrow
EastAsianWide // Wide in its usual form
EastAsianNarrow // Narrow in its usual form
EastAsianFullwidth // Compatibility decomposition of type wide
EastAsianHalfwidth // Compatibility decomposition of type narrow
)
func (k Kind) String() string// Properties provides access to width properties of a rune
type Properties struct{}
func Lookup(b []byte) (p Properties, size int)
func LookupString(s string) (p Properties, size int)
func LookupRune(r rune) Properties
func (p Properties) Kind() Kind
func (p Properties) Narrow() rune
func (p Properties) Wide() rune
func (p Properties) Folded() rune// Transformer implements transform.Transformer
type Transformer struct{}
func (t Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t Transformer) Span(src []byte, atEOF bool) (n int, err error)
func (t Transformer) Reset()
func (t Transformer) String(s string) string
func (t Transformer) Bytes(b []byte) []byte// Fold maps all runes to their canonical width
var Fold Transformer
// Widen maps runes to their wide variant, if available
var Widen Transformer
// Narrow maps runes to their narrow variant, if available
var Narrow Transformerconst UnicodeVersion string = "15.0.0"import "golang.org/x/text/width"
// Convert to full-width
result := width.Widen.String("ABC123") // "ABC123"
// Convert to half-width
result = width.Narrow.String("ABC123") // "ABC123"
// Fold to canonical width (typically narrow)
result = width.Fold.String("ABC123") // "ABC123"
// Transform bytes
widened := width.Widen.Bytes([]byte("Hello"))
// Look up width properties
props, size := width.LookupString("A")
kind := props.Kind() // EastAsianNarrow
wide := props.Wide() // 'A' (full-width)
narrow := props.Narrow() // 'A' (already narrow)
folded := props.Folded() // 'A' (canonical form)
// Check specific characters
props = width.LookupRune('A') // Full-width A
if props.Kind() == width.EastAsianFullwidth {
narrow := props.Narrow() // 'A'
}
// Use with transform.Chain
import "golang.org/x/text/transform"
normalized := transform.Chain(
width.Fold,
// ... other transformers
)import (
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/cases"
"golang.org/x/text/runes"
"golang.org/x/text/width"
"unicode"
)
// Normalize text for comparison
func normalizeForComparison(s string) string {
t := transform.Chain(
// Normalize Unicode
norm.NFD,
// Remove diacritics
runes.Remove(runes.In(unicode.Mn)),
norm.NFC,
// Normalize width
width.Fold,
// Case fold
cases.Fold(),
)
result, _, _ := transform.String(t, s)
return result
}import (
"unicode"
"golang.org/x/text/runes"
"golang.org/x/text/transform"
)
// Remove control characters and keep only printable text
func sanitizeInput(s string) string {
t := runes.Remove(runes.Predicate(func(r rune) bool {
return unicode.IsControl(r) && !unicode.IsSpace(r)
}))
result, _, _ := transform.String(t, s)
return result
}
// Keep only alphanumeric and basic punctuation
func keepSafeCharacters(s string) string {
safe := runes.Predicate(func(r rune) bool {
return unicode.IsLetter(r) ||
unicode.IsNumber(r) ||
unicode.IsSpace(r) ||
r == '.' || r == ',' || r == '!' || r == '?'
})
t := runes.Remove(runes.Predicate(func(r rune) bool {
return !safe.Contains(r)
}))
result, _, _ := transform.String(t, s)
return result
}import "golang.org/x/text/runes"
// ROT13 cipher
func rot13(s string) string {
t := runes.Map(func(r rune) rune {
if r >= 'a' && r <= 'z' {
return 'a' + (r-'a'+13)%26
}
if r >= 'A' && r <= 'Z' {
return 'A' + (r-'A'+13)%26
}
return r
})
return t.String(s)
}
// Obfuscate text by replacing vowels with asterisks
func obfuscateVowels(s string) string {
t := runes.Map(func(r rune) rune {
vowels := "aeiouAEIOU"
for _, v := range vowels {
if r == v {
return '*'
}
}
return r
})
return t.String(s)
}import (
"golang.org/x/text/cases"
"golang.org/x/text/language"
)
// Process text based on language
func processText(text string, lang language.Tag) string {
// Use language-appropriate case mapping
title := cases.Title(lang)
return title.String(text)
}
// Example: Turkish vs English
text := "istanbul"
enTitle := cases.Title(language.English).String(text) // "Istanbul"
trTitle := cases.Title(language.Turkish).String(text) // "İstanbul"import (
"io"
"golang.org/x/text/transform"
"golang.org/x/text/unicode/norm"
"golang.org/x/text/width"
)
// Process large file with transformations
func processLargeFile(input io.Reader, output io.Writer) error {
// Create transformation pipeline
t := transform.Chain(
norm.NFC,
width.Fold,
)
// Wrap reader with transformer
reader := transform.NewReader(input, t)
// Copy transformed data to output
_, err := io.Copy(output, reader)
return err
}
// Write transformed data
func writeTransformed(w io.Writer, data []byte) error {
t := transform.Chain(
norm.NFC,
width.Fold,
)
writer := transform.NewWriter(w, t)
defer writer.Close()
_, err := writer.Write(data)
return err
}import "golang.org/x/text/transform"
// Custom transformer that converts spaces to underscores
type spaceToUnderscore struct {
transform.NopResetter
}
func (t spaceToUnderscore) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
for nSrc < len(src) {
if nDst >= len(dst) {
return nDst, nSrc, transform.ErrShortDst
}
if src[nSrc] == ' ' {
dst[nDst] = '_'
} else {
dst[nDst] = src[nSrc]
}
nDst++
nSrc++
}
return nDst, nSrc, nil
}
// Use the custom transformer
func convertSpaces(s string) string {
t := spaceToUnderscore{}
result, _, _ := transform.String(t, s)
return result
}import (
"golang.org/x/text/width"
"strings"
)
// Normalize mixed-width text
func normalizeWidth(s string) string {
return width.Fold.String(s)
}
// Check if string contains full-width characters
func hasFullWidth(s string) bool {
for _, r := range s {
props := width.LookupRune(r)
if props.Kind() == width.EastAsianFullwidth ||
props.Kind() == width.EastAsianWide {
return true
}
}
return false
}
// Convert between widths while preserving layout
func toFullWidth(s string) string {
return width.Widen.String(s)
}
func toHalfWidth(s string) string {
return width.Narrow.String(s)
}Based on: