or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

core-utilities.mddata-structures.mddevelopment-utilities.mddom-styling.mdfunctional-programming.mdindex.mdnetwork-fetch.mdperformance-crypto.mdtext-unicode.mduser-agent-browser.md
tile.json

text-unicode.mddocs/

Text & Unicode

International text processing utilities for Unicode handling, bidirectional text, CJK languages, and localization support.

Capabilities

Unicode Utilities

General Unicode text processing and character handling utilities.

const UnicodeUtils = require('fbjs/lib/UnicodeUtils');
const UnicodeUtilsExtra = require('fbjs/lib/UnicodeUtilsExtra');

/**
 * General Unicode utilities for text processing
 */
const UnicodeUtils: {
  /**
   * Checks if character is a Unicode surrogate pair
   * @param char - Character to check
   * @returns True if character is part of surrogate pair
   */
  isSurrogatePair(char: string): boolean;
  
  /**
   * Gets Unicode code point for character
   * @param char - Character to get code point for
   * @returns Unicode code point number
   */
  getCodePoint(char: string): number;
  
  /**
   * Converts code point to character
   * @param codePoint - Unicode code point
   * @returns Character string
   */
  fromCodePoint(codePoint: number): string;
  
  /**
   * Determines character width for display purposes
   * @param char - Character to measure
   * @returns Width (1 for normal, 2 for wide characters)
   */
  getCharacterWidth(char: string): number;
  
  /**
   * Checks if character is printable
   * @param char - Character to check
   * @returns True if character is printable
   */
  isPrintable(char: string): boolean;
  
  /**
   * Normalizes Unicode string using specified form
   * @param str - String to normalize
   * @param form - Normalization form (NFC, NFD, NFKC, NFKD)
   * @returns Normalized string
   */
  normalize(str: string, form?: string): string;
};

/**
 * Additional Unicode utilities for advanced text processing
 */
const UnicodeUtilsExtra: {
  /**
   * Performs advanced Unicode character classification
   * @param char - Character to classify
   * @returns Object with character properties
   */
  classifyCharacter(char: string): {
    category: string,
    script: string,
    block: string,
    isLetter: boolean,
    isDigit: boolean,
    isWhitespace: boolean,
    isPunctuation: boolean
  };
  
  /**
   * Finds word boundaries in Unicode text
   * @param text - Text to analyze
   * @returns Array of word boundary positions
   */
  findWordBoundaries(text: string): Array<number>;
  
  /**
   * Segments text into grapheme clusters
   * @param text - Text to segment
   * @returns Array of grapheme clusters
   */
  segmentGraphemes(text: string): Array<string>;
};

Bidirectional Text Support

Utilities for handling bidirectional (RTL/LTR) text layout and processing.

const UnicodeBidi = require('fbjs/lib/UnicodeBidi');
const UnicodeBidiDirection = require('fbjs/lib/UnicodeBidiDirection');
const UnicodeBidiService = require('fbjs/lib/UnicodeBidiService');

/**
 * Bidirectional text utilities for RTL/LTR text handling
 */
const UnicodeBidi: {
  /**
   * Determines text direction for string
   * @param text - Text to analyze
   * @returns Direction: 'ltr', 'rtl', or 'neutral'
   */
  getDirection(text: string): string;
  
  /**
   * Applies Unicode Bidirectional Algorithm
   * @param text - Text to process
   * @param direction - Base direction ('ltr' or 'rtl')
   * @returns Object with reordered text and level information
   */
  processText(text: string, direction: string): {
    text: string,
    levels: Array<number>,
    runs: Array<{start: number, end: number, level: number}>
  };
  
  /**
   * Checks if character is strongly RTL
   * @param char - Character to check
   * @returns True if character has strong RTL directionality
   */
  isRTLChar(char: string): boolean;
  
  /**
   * Checks if character is strongly LTR
   * @param char - Character to check
   * @returns True if character has strong LTR directionality
   */
  isLTRChar(char: string): boolean;
  
  /**
   * Gets bidirectional character type
   * @param char - Character to analyze
   * @returns Bidi type (L, R, AL, EN, ES, ET, AN, CS, NSM, BN, B, S, WS, ON)
   */
  getBidiType(char: string): string;
};

/**
 * Constants and utilities for text direction
 */
const UnicodeBidiDirection: {
  LTR: 'ltr',
  RTL: 'rtl',
  NEUTRAL: 'neutral',
  
  /**
   * Determines if direction is RTL
   * @param direction - Direction string
   * @returns True if direction is RTL
   */
  isRTL(direction: string): boolean;
  
  /**
   * Gets opposite direction
   * @param direction - Current direction
   * @returns Opposite direction
   */
  getOpposite(direction: string): string;
};

/**
 * Service for advanced bidirectional text processing
 */
const UnicodeBidiService: {
  /**
   * Processes mixed RTL/LTR text for display
   * @param text - Text with mixed directions
   * @param baseDirection - Base text direction
   * @returns Processed text with proper ordering
   */
  reorderText(text: string, baseDirection: string): string;
  
  /**
   * Calculates cursor position mapping for bidi text
   * @param text - Bidirectional text
   * @param logicalPosition - Logical cursor position
   * @returns Visual cursor position
   */
  logicalToVisual(text: string, logicalPosition: number): number;
  
  /**
   * Converts visual position to logical position
   * @param text - Bidirectional text
   * @param visualPosition - Visual cursor position
   * @returns Logical cursor position
   */
  visualToLogical(text: string, visualPosition: number): number;
};

Usage Examples:

const UnicodeBidi = require('fbjs/lib/UnicodeBidi');
const UnicodeBidiDirection = require('fbjs/lib/UnicodeBidiDirection');

// Detect text direction
const arabicText = 'مرحبا بالعالم';
const direction = UnicodeBidi.getDirection(arabicText); // 'rtl'

// Mixed language text
const mixedText = 'Hello مرحبا World';
const processed = UnicodeBidi.processText(mixedText, 'ltr');
console.log('Reordered:', processed.text);

// Check character directionality
const isRTL = UnicodeBidi.isRTLChar('ا'); // true (Arabic letter)
const isLTR = UnicodeBidi.isLTRChar('A'); // true (Latin letter)

// Direction utilities
const isRightToLeft = UnicodeBidiDirection.isRTL('rtl'); // true
const opposite = UnicodeBidiDirection.getOpposite('ltr'); // 'rtl'

CJK Language Support

Specialized utilities for Chinese, Japanese, and Korean text processing.

const UnicodeCJK = require('fbjs/lib/UnicodeCJK');
const UnicodeHangulKorean = require('fbjs/lib/UnicodeHangulKorean');

/**
 * Utilities for Chinese, Japanese, and Korean text processing
 */
const UnicodeCJK: {
  /**
   * Checks if character is CJK ideograph
   * @param char - Character to check
   * @returns True if character is CJK ideograph
   */
  isCJKIdeograph(char: string): boolean;
  
  /**
   * Checks if character is Hiragana
   * @param char - Character to check
   * @returns True if character is Hiragana
   */
  isHiragana(char: string): boolean;
  
  /**
   * Checks if character is Katakana
   * @param char - Character to check
   * @returns True if character is Katakana
   */
  isKatakana(char: string): boolean;
  
  /**
   * Checks if character is Hangul
   * @param char - Character to check
   * @returns True if character is Hangul
   */
  isHangul(char: string): boolean;
  
  /**
   * Converts between Hiragana and Katakana
   * @param text - Text containing Japanese characters
   * @param toKatakana - True to convert to Katakana, false for Hiragana
   * @returns Converted text
   */
  convertKana(text: string, toKatakana: boolean): string;
  
  /**
   * Determines script type for CJK text
   * @param text - Text to analyze
   * @returns Script type: 'han', 'hiragana', 'katakana', 'hangul', 'mixed'
   */
  getScript(text: string): string;
  
  /**
   * Breaks CJK text into logical segments
   * @param text - CJK text to segment
   * @returns Array of text segments
   */
  segmentText(text: string): Array<string>;
  
  /**
   * Checks if text requires CJK-specific line breaking
   * @param text - Text to analyze
   * @returns True if CJK line breaking rules should be applied
   */
  needsCJKLineBreaking(text: string): boolean;
};

/**
 * Specialized utilities for Korean Hangul text processing
 */
const UnicodeHangulKorean: {
  /**
   * Decomposes Hangul syllable into constituent Jamo
   * @param syllable - Hangul syllable character
   * @returns Object with initial, medial, and final Jamo
   */
  decompose(syllable: string): {
    initial: string,
    medial: string,
    final?: string
  };
  
  /**
   * Composes Jamo into Hangul syllable
   * @param initial - Initial consonant Jamo
   * @param medial - Medial vowel Jamo
   * @param final - Optional final consonant Jamo
   * @returns Composed Hangul syllable
   */
  compose(initial: string, medial: string, final?: string): string;
  
  /**
   * Checks if character is Hangul Jamo
   * @param char - Character to check
   * @returns True if character is Hangul Jamo
   */
  isJamo(char: string): boolean;
  
  /**
   * Gets Romanization of Hangul text
   * @param hangul - Hangul text to romanize
   * @param system - Romanization system ('revised', 'mccune', 'yale')
   * @returns Romanized text
   */
  romanize(hangul: string, system?: string): string;
  
  /**
   * Normalizes Hangul text for comparison
   * @param text - Hangul text to normalize
   * @returns Normalized text
   */
  normalize(text: string): string;
  
  /**
   * Sorts array of Korean text using proper collation
   * @param texts - Array of Korean text strings
   * @returns Sorted array
   */
  sort(texts: Array<string>): Array<string>;
};

Usage Examples:

const UnicodeCJK = require('fbjs/lib/UnicodeCJK');
const UnicodeHangulKorean = require('fbjs/lib/UnicodeHangulKorean');

// Detect CJK character types
const chinese = '汉字';
const japanese = 'ひらがな';
const korean = '한글';

console.log(UnicodeCJK.isCJKIdeograph('汉')); // true
console.log(UnicodeCJK.isHiragana('ひ')); // true
console.log(UnicodeCJK.isHangul('한')); // true

// Convert between Japanese scripts
const hiragana = 'ひらがな';
const katakana = UnicodeCJK.convertKana(hiragana, true); // 'ヒラガナ'

// Korean Hangul processing
const syllable = '한';
const decomposed = UnicodeHangulKorean.decompose(syllable);
console.log(decomposed); // { initial: 'ㅎ', medial: 'ㅏ', final: 'ㄴ' }

const composed = UnicodeHangulKorean.compose('ㅎ', 'ㅏ', 'ㄴ'); // '한'
const romanized = UnicodeHangulKorean.romanize('안녕하세요'); // 'annyeonghaseyo'

Tokenization Support

Text tokenization utilities for internationalization.

const TokenizeUtil = require('fbjs/lib/TokenizeUtil');

/**
 * Text tokenization utilities
 */
const TokenizeUtil: {
  /**
   * Gets punctuation regex pattern for tokenization
   * @returns Regular expression pattern string for punctuation
   */
  getPunctuation(): string;
  
  /**
   * Tokenizes text into words and punctuation
   * @param text - Text to tokenize
   * @param locale - Optional locale for language-specific rules
   * @returns Array of tokens
   */
  tokenize(text: string, locale?: string): Array<{
    type: 'word' | 'punctuation' | 'whitespace',
    value: string,
    start: number,
    end: number
  }>;
  
  /**
   * Checks if character is word character in given locale
   * @param char - Character to check
   * @param locale - Locale for character classification
   * @returns True if character is word character
   */
  isWordChar(char: string, locale?: string): boolean;
};

Locale Support

Localization and internationalization utilities.

const Locale = require('fbjs/lib/Locale');

/**
 * Locale utilities and constants for internationalization
 */
const Locale: {
  /**
   * Gets current locale
   * @returns Current locale string (e.g., 'en-US', 'ar-SA')
   */
  getCurrent(): string;
  
  /**
   * Sets current locale
   * @param locale - Locale string to set
   */
  setCurrent(locale: string): void;
  
  /**
   * Checks if locale is right-to-left
   * @param locale - Locale string to check
   * @returns True if locale uses RTL writing direction
   */
  isRTL(locale: string): boolean;
  
  /**
   * Gets language code from locale
   * @param locale - Full locale string
   * @returns Language code (e.g., 'en' from 'en-US')
   */
  getLanguage(locale: string): string;
  
  /**
   * Gets region code from locale
   * @param locale - Full locale string
   * @returns Region code (e.g., 'US' from 'en-US')
   */
  getRegion(locale: string): string;
  
  /**
   * Formats locale string
   * @param language - Language code
   * @param region - Region code
   * @returns Formatted locale string
   */
  format(language: string, region?: string): string;
  
  /**
   * Gets supported locales
   * @returns Array of supported locale strings
   */
  getSupported(): Array<string>;
  
  /**
   * Validates locale string format
   * @param locale - Locale string to validate
   * @returns True if locale format is valid
   */
  isValid(locale: string): boolean;
};

Usage Examples:

const Locale = require('fbjs/lib/Locale');
const TokenizeUtil = require('fbjs/lib/TokenizeUtil');

// Locale operations
const currentLocale = Locale.getCurrent(); // e.g., 'en-US'
const isRTL = Locale.isRTL('ar-SA'); // true
const language = Locale.getLanguage('zh-CN'); // 'zh'
const region = Locale.getRegion('zh-CN'); // 'CN'

// Text tokenization
const punctuation = TokenizeUtil.getPunctuation();
const tokens = TokenizeUtil.tokenize('Hello, world!');
// Returns array of token objects with type, value, start, end