Tokenizes a string that represents a regular expression.
—
Predefined character set utilities that generate common regex character class tokens. These functions create structured token representations for standard character classes like digits, words, and whitespace.
Creates character sets for word characters (letters, digits, and underscore).
/**
* Creates a character set token for word characters (\w equivalent)
* Includes: a-z, A-Z, 0-9, and underscore (_)
* @returns Set token representing [a-zA-Z0-9_]
*/
function words(): Set;
/**
* Creates a negated character set token for non-word characters (\W equivalent)
* Matches any character except: a-z, A-Z, 0-9, and underscore (_)
* @returns Set token representing [^a-zA-Z0-9_]
*/
function notWords(): Set;Usage Examples:
import { words, notWords, reconstruct } from "ret";
// Generate word character set
const wordSet = words();
// Result: { type: types.SET, set: [...], not: false }
// Generate non-word character set
const nonWordSet = notWords();
// Result: { type: types.SET, set: [...], not: true }
// Reconstruct to regex strings
reconstruct(wordSet); // "\\w"
reconstruct(nonWordSet); // "\\W"Creates character sets for numeric digits.
/**
* Creates a character set token for digit characters (\d equivalent)
* Includes: 0-9
* @returns Set token representing [0-9]
*/
function ints(): Set;
/**
* Creates a negated character set token for non-digit characters (\D equivalent)
* Matches any character except: 0-9
* @returns Set token representing [^0-9]
*/
function notInts(): Set;Usage Examples:
import { ints, notInts, reconstruct } from "ret";
// Generate digit character set
const digitSet = ints();
// Result: { type: types.SET, set: [{ type: types.RANGE, from: 48, to: 57 }], not: false }
// Generate non-digit character set
const nonDigitSet = notInts();
// Result: { type: types.SET, set: [{ type: types.RANGE, from: 48, to: 57 }], not: true }
// Reconstruct to regex strings
reconstruct(digitSet); // "\\d"
reconstruct(nonDigitSet); // "\\D"Creates character sets for whitespace characters.
/**
* Creates a character set token for whitespace characters (\s equivalent)
* Includes: space, tab, newline, carriage return, form feed, vertical tab, and Unicode whitespace
* @returns Set token representing whitespace characters
*/
function whitespace(): Set;
/**
* Creates a negated character set token for non-whitespace characters (\S equivalent)
* Matches any character except whitespace characters
* @returns Set token representing non-whitespace characters
*/
function notWhitespace(): Set;Usage Examples:
import { whitespace, notWhitespace, reconstruct } from "ret";
// Generate whitespace character set
const spaceSet = whitespace();
// Result: { type: types.SET, set: [...extensive whitespace chars...], not: false }
// Generate non-whitespace character set
const nonSpaceSet = notWhitespace();
// Result: { type: types.SET, set: [...extensive whitespace chars...], not: true }
// Reconstruct to regex strings
reconstruct(spaceSet); // "\\s"
reconstruct(nonSpaceSet); // "\\S"Creates a character set representing the dot (.) metacharacter.
/**
* Creates a character set token for any character except line terminators (. equivalent)
* Matches any character except: \n, \r, \u2028 (line separator), \u2029 (paragraph separator)
* @returns Set token representing any character except line terminators
*/
function anyChar(): Set;Usage Examples:
import { anyChar, reconstruct } from "ret";
// Generate any-character set
const anySet = anyChar();
// Result: { type: types.SET, set: [line terminator chars], not: true }
// Reconstruct to regex string
reconstruct(anySet); // "."All character set functions return Set tokens with the following structure:
interface Set {
type: types.SET;
set: SetTokens; // Array of characters and ranges
not: boolean; // Whether the set is negated
}
// SetTokens contain individual characters or character ranges
type SetTokens = (Range | Char | Set)[];
interface Range {
type: types.RANGE;
from: number; // Start character code
to: number; // End character code
}
interface Char {
type: types.CHAR;
value: number; // Character code
}import { tokenizer, words, ints, reconstruct, types } from "ret";
// Create a pattern that matches word characters followed by digits
const customPattern = {
type: types.ROOT,
stack: [
{ type: types.REPETITION, min: 1, max: Infinity, value: words() },
{ type: types.REPETITION, min: 1, max: Infinity, value: ints() }
]
};
reconstruct(customPattern); // "\\w+\\d+"import { tokenizer, words, notWords } from "ret";
// Parse a regex and identify if it uses standard character classes
const tokens = tokenizer("\\w+@\\w+\\.\\w+");
// This would parse to tokens using words() sets for \\w patternsimport { words, ints, whitespace } from "ret";
// These character sets can be composed into more complex patterns
// or used individually in token construction for regex generation
const wordChars = words().set; // Get the underlying character/range array
const digitChars = ints().set; // Get digit character ranges
const spaceChars = whitespace().set; // Get whitespace character definitionsInstall with Tessl CLI
npx tessl i tessl/npm-ret