or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

docs

byte-order-operations.mdchannel-factories.mdchannel-interfaces.mdchannel-operations.mdcharacter-encoding.mdindex.mdobject-pooling.md
tile.json

character-encoding.mddocs/

Character Encoding

Character encoding and decoding support with charset management for text processing operations in byte channels.

Capabilities

Charset Management

Core charset handling and registry for text encoding operations.

/** Base class for character sets */
expect abstract class Charset

/** Registry of supported character sets */
expect object Charsets {
    /** UTF-8 character set */
    val UTF_8: Charset
    
    /** ISO 8859-1 (Latin-1) character set */
    val ISO_8859_1: Charset
}

/** Check if a charset name is supported */
expect fun Charsets.isSupported(name: String): Boolean

/** Get charset by name */
expect fun Charsets.forName(name: String): Charset

/** Get the name of a charset */
expect val Charset.name: String

Usage Examples:

import io.ktor.utils.io.charsets.*

fun charsetExamples() {
    // Use predefined charsets
    val utf8 = Charsets.UTF_8
    val latin1 = Charsets.ISO_8859_1
    
    // Check charset support
    if (Charsets.isSupported("UTF-16")) {
        val utf16 = Charsets.forName("UTF-16")
        println("UTF-16 is supported")
    }
    
    // Common charset names
    val ascii = Charsets.forName("US-ASCII")
    val windows1252 = if (Charsets.isSupported("windows-1252")) {
        Charsets.forName("windows-1252")
    } else null
}

Charset Encoders and Decoders

Character encoding and decoding implementations for converting between text and bytes.

/** Encodes character sequences to byte sequences */
expect abstract class CharsetEncoder

/** Decodes byte sequences to character sequences */
expect abstract class CharsetDecoder

/** Encode a character sequence to a Source */
fun CharsetEncoder.encode(input: CharSequence): Source

/** Decode bytes from a Source with maximum character limit */
fun CharsetDecoder.decode(input: Source, max: Int): String

Usage Examples:

import io.ktor.utils.io.charsets.*
import kotlinx.io.*

fun encodingExamples() {
    val charset = Charsets.UTF_8
    
    // Get encoder and decoder
    val encoder = charset.newEncoder()
    val decoder = charset.newDecoder()
    
    // Encode text to bytes
    val text = "Hello, 世界!"
    val encodedSource = encoder.encode(text)
    
    // Decode bytes back to text
    val decodedText = decoder.decode(encodedSource, max = 1000)
    println(decodedText) // "Hello, 世界!"
    
    // Handle different charsets
    val latin1Encoder = Charsets.ISO_8859_1.newEncoder()
    val latin1Text = "Café"
    val latin1Encoded = latin1Encoder.encode(latin1Text)
}

Channel Integration

Character encoding operations integrated with byte channels.

/** Create a ByteReadChannel from text with specific charset */
fun ByteReadChannel(text: String, charset: Charset = Charsets.UTF_8): ByteReadChannel

/** Write a string using UTF-8 encoding */
suspend fun ByteWriteChannel.writeStringUtf8(value: String)

/** Write a string using platform default encoding */
suspend fun ByteWriteChannel.writeString(value: String)

/** Read a UTF-8 line from the channel */
suspend fun ByteReadChannel.readUTF8Line(max: Int = Int.MAX_VALUE): String?

/** Read a UTF-8 line into an Appendable */
suspend fun ByteReadChannel.readUTF8LineTo(
    out: Appendable, 
    max: Int = Int.MAX_VALUE
): Boolean

/** Read a UTF-8 line with specific line ending mode */
suspend fun ByteReadChannel.readUTF8LineTo(
    out: Appendable, 
    max: Int, 
    lineEnding: LineEndingMode
): Boolean

Usage Examples:

import io.ktor.utils.io.*
import io.ktor.utils.io.charsets.*

suspend fun channelEncodingExamples() {
    // Create channel with specific encoding
    val utf8Channel = ByteReadChannel("Hello, UTF-8!", Charsets.UTF_8)
    val latin1Channel = ByteReadChannel("Café", Charsets.ISO_8859_1)
    
    // Write with encoding
    val writeChannel = ByteChannel()
    writeChannel.writeStringUtf8("UTF-8 text: 你好")
    writeChannel.writeString("Platform encoding text")
    
    // Read UTF-8 text
    val readChannel = ByteReadChannel("Line 1\nLine 2\nLine 3")
    while (!readChannel.isClosedForRead) {
        val line = readChannel.readUTF8Line()
        if (line != null) {
            println("Read: $line")
        } else {
            break
        }
    }
    
    // Read into StringBuilder
    val textChannel = ByteReadChannel("Multi\nline\ntext")
    val builder = StringBuilder()
    while (textChannel.readUTF8LineTo(builder)) {
        builder.append("\n")
    }
    println("All text: $builder")
}

Line Ending Support

Support for different line ending modes in text processing.

/**
 * Represents different line ending modes for text processing
 */
value class LineEndingMode {
    companion object {
        /** Carriage return (\r) */
        val CR: LineEndingMode
        
        /** Line feed (\n) */
        val LF: LineEndingMode
        
        /** Carriage return + line feed (\r\n) */
        val CRLF: LineEndingMode
        
        /** Any line ending mode */
        val Any: LineEndingMode
    }
    
    /** Check if this mode contains another mode */
    operator fun contains(other: LineEndingMode): Boolean
    
    /** Combine line ending modes */
    operator fun plus(other: LineEndingMode): LineEndingMode
}

Usage Examples:

import io.ktor.utils.io.*

suspend fun lineEndingExamples() {
    // Text with different line endings
    val mixedText = "Line 1\r\nLine 2\nLine 3\rLine 4"
    val channel = ByteReadChannel(mixedText)
    
    val result = StringBuilder()
    
    // Read with specific line ending mode
    while (channel.readUTF8LineTo(result, max = 1000, lineEnding = LineEndingMode.Any)) {
        result.append(" | ")
    }
    
    println("Lines: $result")
    
    // Check line ending modes
    val crlfMode = LineEndingMode.CRLF
    val anyMode = LineEndingMode.Any
    
    if (LineEndingMode.CR in anyMode) {
        println("Any mode contains CR")
    }
    
    // Combine modes
    val combined = LineEndingMode.CR + LineEndingMode.LF
    println("Combined mode: $combined")
}

Exception Types

Exceptions related to character encoding operations.

/** Exception thrown when input contains malformed character sequences */
expect open class MalformedInputException : Exception

/** Exception thrown when a line exceeds maximum length */
class TooLongLineException(message: String) : Exception(message)

Usage Examples:

import io.ktor.utils.io.*
import io.ktor.utils.io.charsets.*

suspend fun exceptionHandlingExamples() {
    try {
        val channel = ByteReadChannel("Very long line without breaks...")
        
        // This might throw TooLongLineException if line is too long
        val line = channel.readUTF8Line(max = 10)
        
    } catch (e: TooLongLineException) {
        println("Line too long: ${e.message}")
    }
    
    try {
        // Malformed input handling depends on the charset implementation
        val decoder = Charsets.UTF_8.newDecoder()
        // ... decoding operations that might fail
        
    } catch (e: MalformedInputException) {
        println("Malformed input: ${e.message}")
    }
}

Character Set Examples

Common Charsets

// UTF-8 (default for most operations)
val utf8 = Charsets.UTF_8

// Latin-1 (ISO 8859-1)
val latin1 = Charsets.ISO_8859_1

// Platform-specific charsets
val ascii = Charsets.forName("US-ASCII")
val utf16 = Charsets.forName("UTF-16")
val windows1252 = Charsets.forName("windows-1252")

Text Processing Patterns

// Create channels with specific encoding
val channel = ByteReadChannel(text, Charsets.UTF_8)

// Process line by line
while (!channel.isClosedForRead) {
    val line = channel.readUTF8Line(max = 8192)
    if (line != null) {
        processLine(line)
    }
}

// Write with encoding
writeChannel.writeStringUtf8("UTF-8 content")

Error Handling Patterns

// Check charset support
if (Charsets.isSupported("CHARSET_NAME")) {
    val charset = Charsets.forName("CHARSET_NAME")
    // Use charset
} else {
    // Fallback to UTF-8
    val charset = Charsets.UTF_8
}

// Handle line length limits
try {
    val line = channel.readUTF8Line(max = maxLineLength)
} catch (e: TooLongLineException) {
    // Handle or skip long lines
}