JavaScript library for creating language models with next-token prediction capabilities including autocomplete, text completion, and AI-powered text generation.
The Vector Operations system provides internal fixed-dimension vector functionality for embedding representations and similarity calculations. The Vector class extends JavaScript's native Array with validation and utility methods specifically designed for high-dimensional token embeddings. This is an internal component not directly exported from the main package.
A specialized Array extension that enforces fixed dimensions and provides vector-specific functionality.
/**
* Vector class extending Array with fixed dimensions
* @extends Array
*/
class Vector extends Array {
/**
* Create zero-filled vector with default dimensions
* @returns {Vector} New vector filled with zeros
*/
static fromNull();
/**
* Create vector with length validation
* @param {...*} arguments - Array constructor arguments
* @throws {string} "RangeError: Invalid vector length." if length doesn't match DIMENSIONS
*/
constructor(...arguments);
}Usage Examples:
const Vector = require('next-token-prediction/components/Vector');
// Create zero vector
const zeroVector = Vector.fromNull();
console.log(zeroVector.length); // 144 (default DIMENSIONS)
console.log(zeroVector[0]); // 0
// Create vector with specific values (must match DIMENSIONS)
const customVector = new Vector(...new Array(144).fill(0.5));
// This would throw an error (wrong length)
try {
const invalidVector = new Vector(1, 2, 3); // Only 3 elements, needs 144
} catch (error) {
console.log(error); // "RangeError: Invalid vector length."
}Internal utility functions for vector operations used throughout the prediction system.
/**
* Calculate dot product of two vectors
* @param {Vector|number[]} vectorA - First vector (defaults to null vector)
* @param {Vector|number[]} vectorB - Second vector (defaults to null vector)
* @returns {number} Dot product result
*/
function dotProduct(vectorA = Vector.fromNull(), vectorB = Vector.fromNull());Usage Examples:
// Calculate similarity between token embeddings
const embedding1 = Vector.fromNull();
const embedding2 = Vector.fromNull();
// Set some values for demonstration
embedding1[0] = 0.8;
embedding1[1] = 0.6;
embedding2[0] = 0.6;
embedding2[1] = 0.8;
const similarity = dotProduct(embedding1, embedding2);
console.log('Similarity score:', similarity); // 0.96 (0.8*0.6 + 0.6*0.8)
// Use in token similarity comparison
const tokenEmbedding = embeddingSearch('hello', 'world');
const candidateEmbedding = embeddingSearch('hello', 'there');
const score = dotProduct(tokenEmbedding, candidateEmbedding);The vector system is configured through environment variables:
/**
* Vector configuration environment variables
*/
interface VectorConfig {
DIMENSIONS: number; // Vector dimensionality (default: 144)
}Configuration Examples:
# Standard configuration (default)
export DIMENSIONS=144
# Higher dimensional embeddings for more complex models
export DIMENSIONS=256
# Lower dimensional for faster processing/less memory
export DIMENSIONS=64Vectors are used to represent token embeddings with specific meaning for each dimension:
/**
* Vector dimension allocation for token embeddings
*/
interface VectorDimensions {
// Character composition (positions 0-65): 66 dimensions
characterDistribution: {
startIndex: 0;
length: 66;
description: "Distribution of alphanumeric characters (A-Z, a-z, 0-9, #$%&)";
};
// Parts of speech (positions 66-101): 36 dimensions
partOfSpeech: {
startIndex: 66;
length: 36;
description: "Grammatical role indicators (CC, CD, DT, EX, FW, IN, JJ, etc.)";
};
// Token prevalence (position 102): 1 dimension
prevalence: {
startIndex: 102;
length: 1;
description: "Token frequency in training dataset (normalized 0-1)";
};
// Word suffixes (positions 103-139): 37 dimensions
suffixes: {
startIndex: 103;
length: 37;
description: "Common rhyme/ending patterns (ack, ail, ain, ake, etc.)";
};
// Next-word frequency (position 140): 1 dimension
nextWordFrequency: {
startIndex: 140;
length: 1;
description: "Normalized co-occurrence frequency";
};
// Content filtering (position 141): 1 dimension
vulgarity: {
startIndex: 141;
length: 1;
description: "Profanity detection (currently placeholder)";
};
// Style features (positions 142-143): 2 dimensions
style: {
startIndex: 142;
length: 2;
description: "Stylistic features [pirate, victorian]";
};
}/**
* Example vector operations for token similarity
*/
interface VectorOperations {
// Get embedding for token pair
embeddingSearch(prevToken: string, token: string): Vector;
// Find similar tokens using vector similarity
getSimilarToken(prevToken: string, token: string): {
token: string;
rankedTokenList: string[];
};
// Calculate token relationship strength
dotProduct(vectorA: Vector, vectorB: Vector): number;
}Advanced Usage Examples:
// Analyze token relationships
function analyzeTokenRelationship(token1, token2, prevContext) {
const embedding1 = embeddingSearch(prevContext, token1);
const embedding2 = embeddingSearch(prevContext, token2);
const similarity = dotProduct(embedding1, embedding2);
return {
similarity,
isHighlySimilar: similarity > 0.8,
characterSimilarity: dotProduct(
embedding1.slice(0, 66),
embedding2.slice(0, 66)
),
grammaticalSimilarity: dotProduct(
embedding1.slice(66, 102),
embedding2.slice(66, 102)
)
};
}
// Custom vector creation for analysis
function createCustomEmbedding() {
const vector = Vector.fromNull();
// Set character distribution (first 66 dimensions)
vector[0] = 0.1; // 'A' frequency
vector[1] = 0.05; // 'B' frequency
// ... continue for all characters
// Set part-of-speech indicators (dimensions 66-101)
vector[66] = 1.0; // CC (coordinating conjunction)
vector[67] = 0.0; // CD (cardinal digit)
// ... continue for all POS tags
return vector;
}DIMENSIONS * 8 bytes (64-bit numbers)Vector.fromNull()// Reuse zero vectors instead of creating new ones
const sharedZero = Vector.fromNull();
// Cache frequently accessed embeddings
const embeddingCache = new Map();
function getCachedEmbedding(prev, token) {
const key = `${prev}:${token}`;
if (!embeddingCache.has(key)) {
embeddingCache.set(key, embeddingSearch(prev, token));
}
return embeddingCache.get(key);
}
// Batch similarity calculations for efficiency
function batchSimilarity(queryEmbedding, candidateEmbeddings) {
return candidateEmbeddings.map(candidate =>
dotProduct(queryEmbedding, candidate)
);
}The vector system provides clear error handling for common issues:
// Dimension validation error
try {
const wrongSize = new Vector(1, 2, 3); // Wrong number of elements
} catch (error) {
console.log(error); // "RangeError: Invalid vector length."
}
// Safe vector operations with defaults
const safeResult = dotProduct(
undefinedVector, // Will default to Vector.fromNull()
anotherVector
);
// Null vector fallback in embeddings
const embedding = embeddingSearch('unknown', 'token') || Vector.fromNull();Install with Tessl CLI
npx tessl i tessl/npm-next-token-prediction