This document covers language-sensitive search/matching and security-related text processing including PRECIS profiles and the Bidi Rule.
Import path: golang.org/x/text/search
Provides language-specific search and string matching based on collation rules.
// Matcher implements language-specific string matching
type Matcher struct{}
func New(t language.Tag, opts ...Option) *Matcher// IndexString reports the start and end position of first occurrence of pat in s
func (m *Matcher) IndexString(s, pat string, opts ...IndexOption) (start, end int)
// Index reports the start and end position of first occurrence of pat in b
func (m *Matcher) Index(b, pat []byte, opts ...IndexOption) (start, end int)
// EqualString reports whether a and b are equivalent
func (m *Matcher) EqualString(a, b string) bool
// Equal reports whether a and b are equivalent
func (m *Matcher) Equal(a, b []byte) bool// Pattern is a compiled search string (safe for concurrent use)
type Pattern struct{}
// IndexString reports the start and end position of first occurrence of p in s
func (p *Pattern) IndexString(s string, opts ...IndexOption) (start, end int)
// Index reports the start and end position of first occurrence of p in b
func (p *Pattern) Index(b []byte, opts ...IndexOption) (start, end int)// CompileString compiles and returns a pattern for faster searching
func (m *Matcher) CompileString(s string) *Pattern
// Compile compiles and returns a pattern for faster searching
func (m *Matcher) Compile(b []byte) *Pattern// Option configures a Matcher
type Option func(*Matcher)
// Predefined options
var WholeWord Option // Restricts matches to complete words
var Exact Option // Requires exact equivalence
var Loose Option // Ignores case, diacritics and width
var IgnoreCase Option // Enables case-insensitive search
var IgnoreDiacritics Option // Causes diacritics to be ignored ("ö" == "o")
var IgnoreWidth Option // Equates narrow with wide variants// IndexOption specifies how Index methods should match input
type IndexOption byte
const (
Anchor IndexOption = 1 << iota // Restricts search to start (or end for Backwards)
Backwards // Starts search from the end of text
)// Supported lists languages for which search differs from parent
var Supported language.Coverageconst CLDRVersion string = "23"
const UnicodeVersion string = "6.2.0"import (
"golang.org/x/text/language"
"golang.org/x/text/search"
)
// Create a matcher for a language
m := search.New(language.English)
// Search for substring
start, end := m.IndexString("Hello World", "world")
// start=-1, end=-1 (case-sensitive by default)
// Case-insensitive search
m = search.New(language.English, search.IgnoreCase)
start, end = m.IndexString("Hello World", "world")
// start=6, end=11
// Ignore diacritics
m = search.New(language.English, search.IgnoreDiacritics)
start, end = m.IndexString("café", "cafe")
// start=0, end=4
// Loose search (ignore case, diacritics, width)
m = search.New(language.English, search.Loose)
start, end = m.IndexString("Café", "cafe")
// start=0, end=4
// Whole word matching
m = search.New(language.English, search.WholeWord, search.IgnoreCase)
start, end = m.IndexString("Hello World", "world")
// start=6, end=11 (matches whole word)
start, end = m.IndexString("Worldwide", "world")
// start=-1, end=-1 (not a whole word)
// Exact matching
m = search.New(language.English, search.Exact)
start, end = m.IndexString("Café", "cafe")
// start=-1, end=-1 (exact match required)
// Compile pattern for repeated searches
m = search.New(language.English, search.IgnoreCase)
pattern := m.CompileString("world")
start, end = pattern.IndexString("Hello World")
// start=6, end=11
// Anchor to start
start, end = pattern.IndexString("World is big", search.Anchor)
// start=0, end=5
// Search backwards
start, end = pattern.IndexString("World loves World", search.Backwards)
// start=12, end=17 (last occurrence)
// Check equality
m = search.New(language.English, search.Loose)
equal := m.EqualString("Café", "cafe") // true
// Search in bytes
start, end = m.Index([]byte("café"), []byte("cafe"))Import path: golang.org/x/text/secure/precis
Implements PRECIS (Preparation, Enforcement, and Comparison of Internationalized Strings) as defined in RFC 8264. Used for preparing usernames, passwords, and other secure text.
BE ADVISED: This package is under construction and the API may change.
// Profile represents a set of rules for normalizing and validating strings
type Profile struct{}// Nickname profile (RFC 8266)
var Nickname *Profile
// UsernameCaseMapped profile (RFC 8265)
var UsernameCaseMapped *Profile
// UsernameCasePreserved profile (RFC 8265)
var UsernameCasePreserved *Profile
// OpaqueString profile for passwords and secure labels (RFC 8265)
var OpaqueString *Profile// NewFreeform creates a PRECIS profile based on the Freeform string class
func NewFreeform(opts ...Option) *Profile
// NewIdentifier creates a PRECIS profile based on the Identifier string class
func NewIdentifier(opts ...Option) *Profile
// NewRestrictedProfile creates a PRECIS profile based on an existing profile
func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile// String returns a string with the result of applying the profile
func (p *Profile) String(s string) (string, error)
// Bytes returns a new byte slice with the result of applying the profile
func (p *Profile) Bytes(b []byte) ([]byte, error)
// Append appends the result of applying p to src, writing to dst
func (p *Profile) Append(dst, src []byte) ([]byte, error)
// Compare enforces both strings and compares them for bit-string identity
func (p *Profile) Compare(a, b string) bool
// CompareKey returns a string that can be used for comparison, hashing, or collation
func (p *Profile) CompareKey(s string) (string, error)
// AppendCompareKey appends the result of applying p to src
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error)
// Allowed returns a runes.Set containing every rune in the profile's string class
func (p *Profile) Allowed() runes.Set
// NewTransformer creates a transform.Transformer for PRECIS preparation
func (p *Profile) NewTransformer() *Transformer// Option is used to define the behavior and rules of a Profile
type Option func(*options)
// Predefined options
var IgnoreCase Option // Performs case insensitive comparison
var FoldWidth Option // Maps non-canonical wide and narrow variants
var DisallowEmpty Option // Returns error if resulting string would be empty
var BidiRule Option // Applies Bidi Rule defined in RFC 5893
// Option functions
func FoldCase(opts ...cases.Option) Option
func LowerCase(opts ...cases.Option) Option
func Norm(f norm.Form) Option
func AdditionalMapping(t ...func() transform.Transformer) Option
func Disallow(set runes.Set) Option// Transformer implements transform.Transformer
type Transformer struct{}
func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t *Transformer) Reset()
func (t *Transformer) String(s string) string
func (t *Transformer) Bytes(b []byte) []byteconst UnicodeVersion string = "15.0.0"import (
"golang.org/x/text/secure/precis"
)
// Username validation (case-mapped)
username, err := precis.UsernameCaseMapped.String("User@Example.Com")
// username = "user@example.com", err = nil
// Username validation (case-preserved)
username, err = precis.UsernameCasePreserved.String("User@Example.Com")
// username = "User@Example.Com", err = nil
// Nickname validation
nickname, err := precis.Nickname.String(" Alice ")
// nickname = "Alice", err = nil
// Password/opaque string enforcement
password, err := precis.OpaqueString.String("MyP@ssw0rd")
// password = "MyP@ssw0rd", err = nil
// Compare usernames
match := precis.UsernameCaseMapped.Compare("User@Example.Com", "user@example.com")
// match = true
// Create custom profile
import "golang.org/x/text/runes"
import "unicode"
customProfile := precis.NewIdentifier(
precis.IgnoreCase,
precis.Disallow(runes.In(unicode.Space)),
)
value, err := customProfile.String("MyIdentifier")
// Get comparison key for hashing
key, err := precis.UsernameCaseMapped.CompareKey("User@Example.Com")
// Use key for map keys or database lookups
// Create restricted profile
restricted := precis.NewRestrictedProfile(
precis.UsernameCaseMapped,
runes.Predicate(func(r rune) bool {
return r == '@' // Disallow @ symbol
}),
)
// Use with transformer
t := precis.UsernameCaseMapped.NewTransformer()
result := t.String("User@Example.Com")Import path: golang.org/x/text/secure/bidirule
Implements the Bidi Rule defined by RFC 5893, used for validating internationalized domain names and other bidirectional text in security contexts.
NOTE: This package is under development and may change without preserving backward compatibility.
// Valid reports whether b conforms to the BiDi rule
func Valid(b []byte) bool
// ValidString reports whether s conforms to the BiDi rule
func ValidString(s string) bool
// Direction reports the direction of the given label as defined by RFC 5893
func Direction(b []byte) bidi.Direction
// DirectionString reports the direction of the given label
func DirectionString(s string) bidi.Direction// Transformer verifies that input adheres to the Bidi Rule
type Transformer struct{}
func New() *Transformer
func (t *Transformer) Reset()
func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error)// ErrInvalid indicates a label is invalid according to the Bidi Rule
var ErrInvalid errorimport (
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/unicode/bidi"
)
// Validate domain label
valid := bidirule.ValidString("example") // true
valid = bidirule.ValidString("مثال") // true (Arabic, valid RTL)
valid = bidirule.ValidString("exam-مثال-ple") // false (mixed LTR/RTL)
// Get direction
dir := bidirule.DirectionString("example") // bidi.LeftToRight
dir = bidirule.DirectionString("مثال") // bidi.RightToLeft
// Validate bytes
valid = bidirule.Valid([]byte("example"))
// Use transformer for validation
t := bidirule.New()
import "golang.org/x/text/transform"
result, _, err := transform.String(t, "example")
if err == bidirule.ErrInvalid {
// Invalid according to Bidi Rule
}
// Span to check how much is valid
n, err := t.Span([]byte("example"), true)
if err != nil {
// First n bytes are valid
}import (
"golang.org/x/text/language"
"golang.org/x/text/search"
)
func searchAndReplace(text, search, replace string, lang language.Tag) string {
m := search.New(lang, search.IgnoreCase, search.IgnoreDiacritics)
var result string
lastEnd := 0
for {
start, end := m.IndexString(text[lastEnd:], search)
if start == -1 {
// No more matches
result += text[lastEnd:]
break
}
// Add text before match
result += text[lastEnd : lastEnd+start]
// Add replacement
result += replace
// Move past match
lastEnd += end
}
return result
}
// Usage
text := "Café and café"
result := searchAndReplace(text, "cafe", "coffee", language.English)
// "coffee and coffee"import (
"errors"
"golang.org/x/text/secure/precis"
)
func validateUsername(username string) (string, error) {
// Normalize username
normalized, err := precis.UsernameCaseMapped.String(username)
if err != nil {
return "", err
}
// Additional validation
if len(normalized) < 3 {
return "", errors.New("username too short")
}
if len(normalized) > 32 {
return "", errors.New("username too long")
}
return normalized, nil
}
// Usage
username, err := validateUsername("Alice@Example.Com")
// username = "alice@example.com"import (
"errors"
"golang.org/x/text/secure/precis"
)
func validatePassword(password string) error {
// Apply PRECIS OpaqueString profile
_, err := precis.OpaqueString.String(password)
if err != nil {
return err
}
// Additional validation rules
if len(password) < 8 {
return errors.New("password too short")
}
// Check for required character types
// (implementation depends on requirements)
return nil
}import (
"golang.org/x/text/language"
"golang.org/x/text/search"
)
type Match struct {
Start int
End int
Text string
}
func findAllMatches(text, pattern string, lang language.Tag) []Match {
m := search.New(lang, search.IgnoreCase)
compiled := m.CompileString(pattern)
var matches []Match
searchText := text
offset := 0
for {
start, end := compiled.IndexString(searchText)
if start == -1 {
break
}
matches = append(matches, Match{
Start: offset + start,
End: offset + end,
Text: text[offset+start : offset+end],
})
offset += end
searchText = text[offset:]
}
return matches
}
// Highlight matches in HTML
func highlightMatches(text, pattern string, lang language.Tag) string {
matches := findAllMatches(text, pattern, lang)
if len(matches) == 0 {
return text
}
var result string
lastEnd := 0
for _, match := range matches {
result += text[lastEnd:match.Start]
result += "<mark>" + match.Text + "</mark>"
lastEnd = match.End
}
result += text[lastEnd:]
return result
}import (
"strings"
"golang.org/x/text/secure/bidirule"
"golang.org/x/text/secure/precis"
)
func validateIDN(domain string) (string, error) {
// Split into labels
labels := strings.Split(domain, ".")
normalizedLabels := make([]string, len(labels))
for i, label := range labels {
// Apply PRECIS UsernameCaseMapped
normalized, err := precis.UsernameCaseMapped.String(label)
if err != nil {
return "", err
}
// Validate Bidi Rule
if !bidirule.ValidString(normalized) {
return "", errors.New("label violates Bidi Rule")
}
normalizedLabels[i] = normalized
}
return strings.Join(normalizedLabels, "."), nil
}
// Usage
domain, err := validateIDN("example.مثال")import (
"crypto/subtle"
"golang.org/x/text/secure/precis"
)
func secureCompareUsernames(a, b string) bool {
// Normalize both usernames
normA, err := precis.UsernameCaseMapped.CompareKey(a)
if err != nil {
return false
}
normB, err := precis.UsernameCaseMapped.CompareKey(b)
if err != nil {
return false
}
// Use constant-time comparison
return subtle.ConstantTimeCompare([]byte(normA), []byte(normB)) == 1
}import (
"golang.org/x/text/language"
"golang.org/x/text/search"
)
func searchWithFallback(text, pattern string, preferredLang language.Tag) (start, end int) {
// Try preferred language
m := search.New(preferredLang, search.IgnoreCase)
start, end = m.IndexString(text, pattern)
if start != -1 {
return start, end
}
// Fall back to language-neutral search
m = search.New(language.Und, search.IgnoreCase)
return m.IndexString(text, pattern)
}import (
"golang.org/x/text/language"
"golang.org/x/text/search"
"golang.org/x/text/secure/precis"
)
type SearchIndex struct {
documents map[string]string // id -> content
normalized map[string]string // id -> normalized content
lang language.Tag
}
func NewSearchIndex(lang language.Tag) *SearchIndex {
return &SearchIndex{
documents: make(map[string]string),
normalized: make(map[string]string),
lang: lang,
}
}
func (idx *SearchIndex) Add(id, content string) error {
// Normalize content for searching
profile := precis.NewFreeform(precis.IgnoreCase, precis.FoldWidth)
normalized, err := profile.String(content)
if err != nil {
// Fall back to original if normalization fails
normalized = content
}
idx.documents[id] = content
idx.normalized[id] = normalized
return nil
}
func (idx *SearchIndex) Search(query string) []string {
// Normalize query
profile := precis.NewFreeform(precis.IgnoreCase, precis.FoldWidth)
normalizedQuery, _ := profile.String(query)
m := search.New(idx.lang, search.Loose)
var results []string
for id, content := range idx.normalized {
start, _ := m.IndexString(content, normalizedQuery)
if start != -1 {
results = append(results, id)
}
}
return results
}Based on: