or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

collation.mdencoding.mdformatting.mdindex.mdlanguage.mdlocalization.mdsearch-and-security.mdtext-transformation.mdunicode.md
tile.json

search-and-security.mddocs/

Search and Security

This document covers language-sensitive search/matching and security-related text processing including PRECIS profiles and the Bidi Rule.

Package Overview

  • search: Language-sensitive string search and matching
  • secure/precis: PRECIS framework for secure string preparation
  • secure/bidirule: Bidi Rule validation for internationalized domain names

Search Package

Import path: golang.org/x/text/search

Provides language-specific search and string matching based on collation rules.

Matcher Type

// Matcher implements language-specific string matching
type Matcher struct{}

func New(t language.Tag, opts ...Option) *Matcher

Matcher Methods

// IndexString reports the start and end position of first occurrence of pat in s
func (m *Matcher) IndexString(s, pat string, opts ...IndexOption) (start, end int)

// Index reports the start and end position of first occurrence of pat in b
func (m *Matcher) Index(b, pat []byte, opts ...IndexOption) (start, end int)

// EqualString reports whether a and b are equivalent
func (m *Matcher) EqualString(a, b string) bool

// Equal reports whether a and b are equivalent
func (m *Matcher) Equal(a, b []byte) bool

Pattern Type

// Pattern is a compiled search string (safe for concurrent use)
type Pattern struct{}

// IndexString reports the start and end position of first occurrence of p in s
func (p *Pattern) IndexString(s string, opts ...IndexOption) (start, end int)

// Index reports the start and end position of first occurrence of p in b
func (p *Pattern) Index(b []byte, opts ...IndexOption) (start, end int)

Matcher Pattern Compilation

// CompileString compiles and returns a pattern for faster searching
func (m *Matcher) CompileString(s string) *Pattern

// Compile compiles and returns a pattern for faster searching
func (m *Matcher) Compile(b []byte) *Pattern

Option Type

// Option configures a Matcher
type Option func(*Matcher)

// Predefined options
var WholeWord Option       // Restricts matches to complete words
var Exact Option           // Requires exact equivalence
var Loose Option           // Ignores case, diacritics and width
var IgnoreCase Option      // Enables case-insensitive search
var IgnoreDiacritics Option // Causes diacritics to be ignored ("ö" == "o")
var IgnoreWidth Option     // Equates narrow with wide variants

IndexOption Type

// IndexOption specifies how Index methods should match input
type IndexOption byte

const (
    Anchor    IndexOption = 1 << iota // Restricts search to start (or end for Backwards)
    Backwards                          // Starts search from the end of text
)

Variables

// Supported lists languages for which search differs from parent
var Supported language.Coverage

Constants

const CLDRVersion string = "23"
const UnicodeVersion string = "6.2.0"

Usage Examples

import (
    "golang.org/x/text/language"
    "golang.org/x/text/search"
)

// Create a matcher for a language
m := search.New(language.English)

// Search for substring
start, end := m.IndexString("Hello World", "world")
// start=-1, end=-1 (case-sensitive by default)

// Case-insensitive search
m = search.New(language.English, search.IgnoreCase)
start, end = m.IndexString("Hello World", "world")
// start=6, end=11

// Ignore diacritics
m = search.New(language.English, search.IgnoreDiacritics)
start, end = m.IndexString("café", "cafe")
// start=0, end=4

// Loose search (ignore case, diacritics, width)
m = search.New(language.English, search.Loose)
start, end = m.IndexString("Café", "cafe")
// start=0, end=4

// Whole word matching
m = search.New(language.English, search.WholeWord, search.IgnoreCase)
start, end = m.IndexString("Hello World", "world")
// start=6, end=11 (matches whole word)
start, end = m.IndexString("Worldwide", "world")
// start=-1, end=-1 (not a whole word)

// Exact matching
m = search.New(language.English, search.Exact)
start, end = m.IndexString("Café", "cafe")
// start=-1, end=-1 (exact match required)

// Compile pattern for repeated searches
m = search.New(language.English, search.IgnoreCase)
pattern := m.CompileString("world")
start, end = pattern.IndexString("Hello World")
// start=6, end=11

// Anchor to start
start, end = pattern.IndexString("World is big", search.Anchor)
// start=0, end=5

// Search backwards
start, end = pattern.IndexString("World loves World", search.Backwards)
// start=12, end=17 (last occurrence)

// Check equality
m = search.New(language.English, search.Loose)
equal := m.EqualString("Café", "cafe") // true

// Search in bytes
start, end = m.Index([]byte("café"), []byte("cafe"))

Secure PRECIS Package

Import path: golang.org/x/text/secure/precis

Implements PRECIS (Preparation, Enforcement, and Comparison of Internationalized Strings) as defined in RFC 8264. Used for preparing usernames, passwords, and other secure text.

BE ADVISED: This package is under construction and the API may change.

Profile Type

// Profile represents a set of rules for normalizing and validating strings
type Profile struct{}

Predefined Profiles

// Nickname profile (RFC 8266)
var Nickname *Profile

// UsernameCaseMapped profile (RFC 8265)
var UsernameCaseMapped *Profile

// UsernameCasePreserved profile (RFC 8265)
var UsernameCasePreserved *Profile

// OpaqueString profile for passwords and secure labels (RFC 8265)
var OpaqueString *Profile

Profile Constructors

// NewFreeform creates a PRECIS profile based on the Freeform string class
func NewFreeform(opts ...Option) *Profile

// NewIdentifier creates a PRECIS profile based on the Identifier string class
func NewIdentifier(opts ...Option) *Profile

// NewRestrictedProfile creates a PRECIS profile based on an existing profile
func NewRestrictedProfile(parent *Profile, disallow runes.Set) *Profile

Profile Methods

// String returns a string with the result of applying the profile
func (p *Profile) String(s string) (string, error)

// Bytes returns a new byte slice with the result of applying the profile
func (p *Profile) Bytes(b []byte) ([]byte, error)

// Append appends the result of applying p to src, writing to dst
func (p *Profile) Append(dst, src []byte) ([]byte, error)

// Compare enforces both strings and compares them for bit-string identity
func (p *Profile) Compare(a, b string) bool

// CompareKey returns a string that can be used for comparison, hashing, or collation
func (p *Profile) CompareKey(s string) (string, error)

// AppendCompareKey appends the result of applying p to src
func (p *Profile) AppendCompareKey(dst, src []byte) ([]byte, error)

// Allowed returns a runes.Set containing every rune in the profile's string class
func (p *Profile) Allowed() runes.Set

// NewTransformer creates a transform.Transformer for PRECIS preparation
func (p *Profile) NewTransformer() *Transformer

Option Type

// Option is used to define the behavior and rules of a Profile
type Option func(*options)

// Predefined options
var IgnoreCase Option     // Performs case insensitive comparison
var FoldWidth Option      // Maps non-canonical wide and narrow variants
var DisallowEmpty Option  // Returns error if resulting string would be empty
var BidiRule Option       // Applies Bidi Rule defined in RFC 5893

// Option functions
func FoldCase(opts ...cases.Option) Option
func LowerCase(opts ...cases.Option) Option
func Norm(f norm.Form) Option
func AdditionalMapping(t ...func() transform.Transformer) Option
func Disallow(set runes.Set) Option

Transformer Type

// Transformer implements transform.Transformer
type Transformer struct{}

func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t *Transformer) Reset()
func (t *Transformer) String(s string) string
func (t *Transformer) Bytes(b []byte) []byte

Constants

const UnicodeVersion string = "15.0.0"

Usage Examples

import (
    "golang.org/x/text/secure/precis"
)

// Username validation (case-mapped)
username, err := precis.UsernameCaseMapped.String("User@Example.Com")
// username = "user@example.com", err = nil

// Username validation (case-preserved)
username, err = precis.UsernameCasePreserved.String("User@Example.Com")
// username = "User@Example.Com", err = nil

// Nickname validation
nickname, err := precis.Nickname.String("  Alice  ")
// nickname = "Alice", err = nil

// Password/opaque string enforcement
password, err := precis.OpaqueString.String("MyP@ssw0rd")
// password = "MyP@ssw0rd", err = nil

// Compare usernames
match := precis.UsernameCaseMapped.Compare("User@Example.Com", "user@example.com")
// match = true

// Create custom profile
import "golang.org/x/text/runes"
import "unicode"

customProfile := precis.NewIdentifier(
    precis.IgnoreCase,
    precis.Disallow(runes.In(unicode.Space)),
)

value, err := customProfile.String("MyIdentifier")

// Get comparison key for hashing
key, err := precis.UsernameCaseMapped.CompareKey("User@Example.Com")
// Use key for map keys or database lookups

// Create restricted profile
restricted := precis.NewRestrictedProfile(
    precis.UsernameCaseMapped,
    runes.Predicate(func(r rune) bool {
        return r == '@' // Disallow @ symbol
    }),
)

// Use with transformer
t := precis.UsernameCaseMapped.NewTransformer()
result := t.String("User@Example.Com")

Secure Bidirule Package

Import path: golang.org/x/text/secure/bidirule

Implements the Bidi Rule defined by RFC 5893, used for validating internationalized domain names and other bidirectional text in security contexts.

NOTE: This package is under development and may change without preserving backward compatibility.

Functions

// Valid reports whether b conforms to the BiDi rule
func Valid(b []byte) bool

// ValidString reports whether s conforms to the BiDi rule
func ValidString(s string) bool

// Direction reports the direction of the given label as defined by RFC 5893
func Direction(b []byte) bidi.Direction

// DirectionString reports the direction of the given label
func DirectionString(s string) bidi.Direction

Transformer Type

// Transformer verifies that input adheres to the Bidi Rule
type Transformer struct{}

func New() *Transformer

func (t *Transformer) Reset()
func (t *Transformer) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error)
func (t *Transformer) Span(src []byte, atEOF bool) (n int, err error)

Errors

// ErrInvalid indicates a label is invalid according to the Bidi Rule
var ErrInvalid error

Usage Examples

import (
    "golang.org/x/text/secure/bidirule"
    "golang.org/x/text/unicode/bidi"
)

// Validate domain label
valid := bidirule.ValidString("example")     // true
valid = bidirule.ValidString("مثال")        // true (Arabic, valid RTL)
valid = bidirule.ValidString("exam-مثال-ple") // false (mixed LTR/RTL)

// Get direction
dir := bidirule.DirectionString("example")  // bidi.LeftToRight
dir = bidirule.DirectionString("مثال")     // bidi.RightToLeft

// Validate bytes
valid = bidirule.Valid([]byte("example"))

// Use transformer for validation
t := bidirule.New()
import "golang.org/x/text/transform"

result, _, err := transform.String(t, "example")
if err == bidirule.ErrInvalid {
    // Invalid according to Bidi Rule
}

// Span to check how much is valid
n, err := t.Span([]byte("example"), true)
if err != nil {
    // First n bytes are valid
}

Common Patterns

Language-Sensitive Search and Replace

import (
    "golang.org/x/text/language"
    "golang.org/x/text/search"
)

func searchAndReplace(text, search, replace string, lang language.Tag) string {
    m := search.New(lang, search.IgnoreCase, search.IgnoreDiacritics)

    var result string
    lastEnd := 0

    for {
        start, end := m.IndexString(text[lastEnd:], search)
        if start == -1 {
            // No more matches
            result += text[lastEnd:]
            break
        }

        // Add text before match
        result += text[lastEnd : lastEnd+start]
        // Add replacement
        result += replace
        // Move past match
        lastEnd += end
    }

    return result
}

// Usage
text := "Café and café"
result := searchAndReplace(text, "cafe", "coffee", language.English)
// "coffee and coffee"

Username Validation and Normalization

import (
    "errors"
    "golang.org/x/text/secure/precis"
)

func validateUsername(username string) (string, error) {
    // Normalize username
    normalized, err := precis.UsernameCaseMapped.String(username)
    if err != nil {
        return "", err
    }

    // Additional validation
    if len(normalized) < 3 {
        return "", errors.New("username too short")
    }
    if len(normalized) > 32 {
        return "", errors.New("username too long")
    }

    return normalized, nil
}

// Usage
username, err := validateUsername("Alice@Example.Com")
// username = "alice@example.com"

Password Validation

import (
    "errors"
    "golang.org/x/text/secure/precis"
)

func validatePassword(password string) error {
    // Apply PRECIS OpaqueString profile
    _, err := precis.OpaqueString.String(password)
    if err != nil {
        return err
    }

    // Additional validation rules
    if len(password) < 8 {
        return errors.New("password too short")
    }

    // Check for required character types
    // (implementation depends on requirements)

    return nil
}

Case-Insensitive Search with Highlighting

import (
    "golang.org/x/text/language"
    "golang.org/x/text/search"
)

type Match struct {
    Start int
    End   int
    Text  string
}

func findAllMatches(text, pattern string, lang language.Tag) []Match {
    m := search.New(lang, search.IgnoreCase)
    compiled := m.CompileString(pattern)

    var matches []Match
    searchText := text

    offset := 0
    for {
        start, end := compiled.IndexString(searchText)
        if start == -1 {
            break
        }

        matches = append(matches, Match{
            Start: offset + start,
            End:   offset + end,
            Text:  text[offset+start : offset+end],
        })

        offset += end
        searchText = text[offset:]
    }

    return matches
}

// Highlight matches in HTML
func highlightMatches(text, pattern string, lang language.Tag) string {
    matches := findAllMatches(text, pattern, lang)

    if len(matches) == 0 {
        return text
    }

    var result string
    lastEnd := 0

    for _, match := range matches {
        result += text[lastEnd:match.Start]
        result += "<mark>" + match.Text + "</mark>"
        lastEnd = match.End
    }

    result += text[lastEnd:]

    return result
}

Internationalized Domain Name Validation

import (
    "strings"
    "golang.org/x/text/secure/bidirule"
    "golang.org/x/text/secure/precis"
)

func validateIDN(domain string) (string, error) {
    // Split into labels
    labels := strings.Split(domain, ".")

    normalizedLabels := make([]string, len(labels))

    for i, label := range labels {
        // Apply PRECIS UsernameCaseMapped
        normalized, err := precis.UsernameCaseMapped.String(label)
        if err != nil {
            return "", err
        }

        // Validate Bidi Rule
        if !bidirule.ValidString(normalized) {
            return "", errors.New("label violates Bidi Rule")
        }

        normalizedLabels[i] = normalized
    }

    return strings.Join(normalizedLabels, "."), nil
}

// Usage
domain, err := validateIDN("example.مثال")

Secure String Comparison

import (
    "crypto/subtle"
    "golang.org/x/text/secure/precis"
)

func secureCompareUsernames(a, b string) bool {
    // Normalize both usernames
    normA, err := precis.UsernameCaseMapped.CompareKey(a)
    if err != nil {
        return false
    }

    normB, err := precis.UsernameCaseMapped.CompareKey(b)
    if err != nil {
        return false
    }

    // Use constant-time comparison
    return subtle.ConstantTimeCompare([]byte(normA), []byte(normB)) == 1
}

Search with Language Fallback

import (
    "golang.org/x/text/language"
    "golang.org/x/text/search"
)

func searchWithFallback(text, pattern string, preferredLang language.Tag) (start, end int) {
    // Try preferred language
    m := search.New(preferredLang, search.IgnoreCase)
    start, end = m.IndexString(text, pattern)

    if start != -1 {
        return start, end
    }

    // Fall back to language-neutral search
    m = search.New(language.Und, search.IgnoreCase)
    return m.IndexString(text, pattern)
}

Building a Search Index

import (
    "golang.org/x/text/language"
    "golang.org/x/text/search"
    "golang.org/x/text/secure/precis"
)

type SearchIndex struct {
    documents map[string]string // id -> content
    normalized map[string]string // id -> normalized content
    lang      language.Tag
}

func NewSearchIndex(lang language.Tag) *SearchIndex {
    return &SearchIndex{
        documents:  make(map[string]string),
        normalized: make(map[string]string),
        lang:       lang,
    }
}

func (idx *SearchIndex) Add(id, content string) error {
    // Normalize content for searching
    profile := precis.NewFreeform(precis.IgnoreCase, precis.FoldWidth)
    normalized, err := profile.String(content)
    if err != nil {
        // Fall back to original if normalization fails
        normalized = content
    }

    idx.documents[id] = content
    idx.normalized[id] = normalized

    return nil
}

func (idx *SearchIndex) Search(query string) []string {
    // Normalize query
    profile := precis.NewFreeform(precis.IgnoreCase, precis.FoldWidth)
    normalizedQuery, _ := profile.String(query)

    m := search.New(idx.lang, search.Loose)

    var results []string
    for id, content := range idx.normalized {
        start, _ := m.IndexString(content, normalizedQuery)
        if start != -1 {
            results = append(results, id)
        }
    }

    return results
}

Version Information

Based on:

  • CLDR 23
  • Unicode 15.0.0 (for PRECIS)
  • Unicode 6.2.0 (for search)
  • RFC 8264 (PRECIS Framework)
  • RFC 8265 (PRECIS Profiles for Usernames and Passwords)
  • RFC 8266 (PRECIS Profile for Nicknames)
  • RFC 5893 (Bidi Rule for IDNA)