CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/maven-io-ktor--ktor-io-watchosarm64

Asynchronous I/O library for Kotlin multiplatform providing channels, streams, and byte manipulation utilities optimized for watchOS ARM64

Pending
Overview
Eval results
Files

character-encoding.mddocs/

Character Encoding

Character encoding and decoding support with UTF-8 and ISO-8859-1 charsets, providing encoder/decoder abstractions for text processing.

Capabilities

Charset Classes

Abstract character set representations with encoder and decoder factory methods.

/**
 * Abstract character set representation.
 * Platform-specific implementation providing encoding and decoding functionality.
 */
abstract class Charset {
    /** Name of the character set (e.g., "UTF-8", "ISO-8859-1") */
    abstract val name: String
    
    /**
     * Create a new encoder for this charset.
     * Encoders convert character sequences to byte sequences.
     * @return new CharsetEncoder instance
     */
    abstract fun newEncoder(): CharsetEncoder
    
    /**
     * Create a new decoder for this charset.
     * Decoders convert byte sequences to character sequences.
     * @return new CharsetDecoder instance
     */
    abstract fun newDecoder(): CharsetDecoder
    
    companion object {
        /**
         * Get a charset by name.
         * @param name charset name (case-insensitive)
         * @return Charset instance
         * @throws UnsupportedCharsetException if charset is not supported
         */
        fun forName(name: String): Charset
        
        /**
         * Check if a charset is supported on this platform.
         * @param charset charset name to check
         * @return true if charset is supported
         */
        fun isSupported(charset: String): Boolean
    }
}

Usage Examples:

import io.ktor.utils.io.charsets.*

// Get charset by name
val utf8 = Charset.forName("UTF-8")
val latin1 = Charset.forName("ISO-8859-1")

// Check charset support
val isSupported = Charset.isSupported("UTF-16") // Platform-dependent

// Create encoders and decoders
val encoder = utf8.newEncoder()
val decoder = utf8.newDecoder()

println("Charset: ${utf8.name}")

CharsetEncoder Class

Character sequence to byte sequence encoder with configurable encoding options.

/**
 * Encoder for converting character sequences to byte sequences.
 * Platform-specific implementation optimized for the target charset.
 */
abstract class CharsetEncoder {
    /** The charset this encoder converts to */
    abstract val charset: Charset
    
    /**
     * Encode a character sequence to a byte array.
     * @param input character sequence to encode
     * @param fromIndex starting character index (inclusive)
     * @param toIndex ending character index (exclusive)
     * @return encoded byte array
     */
    fun encodeToByteArray(
        input: CharSequence,
        fromIndex: Int = 0,
        toIndex: Int = input.length
    ): ByteArray
    
    /**
     * Encode a character sequence to a ByteReadPacket.
     * @param input character sequence to encode
     * @param fromIndex starting character index (inclusive)
     * @param toIndex ending character index (exclusive)
     * @return encoded bytes as a packet
     */
    fun encode(
        input: CharSequence,
        fromIndex: Int = 0,
        toIndex: Int = input.length
    ): ByteReadPacket
}

Usage Examples:

import io.ktor.utils.io.charsets.*
import io.ktor.utils.io.core.*

// Encode text to bytes
val encoder = Charsets.UTF_8.newEncoder()

// Encode to byte array
val text = "Hello, 世界! 🌍"
val bytes = encoder.encodeToByteArray(text)
println("Encoded ${text.length} characters to ${bytes.size} bytes")

// Encode partial text
val partialBytes = encoder.encodeToByteArray(text, fromIndex = 0, toIndex = 5)

// Encode to packet
val packet = encoder.encode(text)
val firstByte = packet.readByte()
val remainingBytes = packet.readRemaining()

// Encode with different charsets
val utf8Encoder = Charsets.UTF_8.newEncoder()
val latin1Encoder = Charsets.ISO_8859_1.newEncoder()

val utf8Bytes = utf8Encoder.encodeToByteArray("Hello")
val latin1Bytes = latin1Encoder.encodeToByteArray("Hello")

println("UTF-8: ${utf8Bytes.size} bytes")
println("ISO-8859-1: ${latin1Bytes.size} bytes")

CharsetDecoder Class

Byte sequence to character sequence decoder with error handling and streaming support.

/**
 * Decoder for converting byte sequences to character sequences.
 * Platform-specific implementation with error handling for malformed input.
 */
abstract class CharsetDecoder {
    /** The charset this decoder converts from */
    abstract val charset: Charset
    
    /**
     * Decode bytes from input to a string.
     * @param input byte input stream
     * @param max maximum characters to decode
     * @return decoded string
     */
    fun decode(input: Input, max: Int = Int.MAX_VALUE): String
    
    /**
     * Decode bytes from input and append to destination.
     * @param input byte input stream
     * @param dst destination to append decoded characters
     * @param max maximum characters to decode
     * @return number of characters decoded
     */
    fun decode(input: Input, dst: Appendable, max: Int = Int.MAX_VALUE): Int
    
    /**
     * Decode exactly the specified number of bytes to a string.
     * @param input byte input stream
     * @param inputLength exact number of bytes to read
     * @return decoded string
     * @throws EOFException if not enough bytes available
     */
    fun decodeExactBytes(input: Input, inputLength: Int): String
}

Usage Examples:

import io.ktor.utils.io.charsets.*
import io.ktor.utils.io.core.*

// Decode bytes to text
val decoder = Charsets.UTF_8.newDecoder()

// Create input from byte array
val bytes = "Hello, 世界! 🌍".toByteArray(Charsets.UTF_8)
val input = ByteReadPacket(bytes)

// Decode to string
val decodedText = decoder.decode(input)
println("Decoded: $decodedText")

// Decode with character limit
val limitedInput = ByteReadPacket(bytes)
val partialText = decoder.decode(limitedInput, max = 10)

// Decode to appendable
val output = StringBuilder()
val bytesInput = ByteReadPacket(bytes)
val charactersDecoded = decoder.decode(bytesInput, output)
println("Decoded $charactersDecoded characters: ${output}")

// Decode exact byte count
val exactInput = ByteReadPacket("Test".toByteArray())
val exactText = decoder.decodeExactBytes(exactInput, inputLength = 4)

// Handle different encodings
val utf8Decoder = Charsets.UTF_8.newDecoder()
val latin1Decoder = Charsets.ISO_8859_1.newDecoder()

val testBytes = byteArrayOf(0x48, 0x65, 0x6C, 0x6C, 0x6F) // "Hello"

val utf8Result = utf8Decoder.decode(ByteReadPacket(testBytes))
val latin1Result = latin1Decoder.decode(ByteReadPacket(testBytes))

println("UTF-8 decoded: $utf8Result")
println("ISO-8859-1 decoded: $latin1Result")

Standard Charsets

Pre-configured charset instances for commonly used character encodings.

/**
 * Standard charset constants for commonly used character encodings.
 */
object Charsets {
    /** UTF-8 character encoding */
    val UTF_8: Charset
    
    /** ISO-8859-1 (Latin-1) character encoding */
    val ISO_8859_1: Charset
}

Usage Examples:

import io.ktor.utils.io.charsets.*

// Use standard charsets
val utf8 = Charsets.UTF_8
val latin1 = Charsets.ISO_8859_1

println("UTF-8 name: ${utf8.name}")
println("ISO-8859-1 name: ${latin1.name}")

// Create encoders for standard charsets
val utf8Encoder = Charsets.UTF_8.newEncoder()
val utf8Decoder = Charsets.UTF_8.newDecoder()

val latin1Encoder = Charsets.ISO_8859_1.newEncoder()
val latin1Decoder = Charsets.ISO_8859_1.newDecoder()

// Compare encoding results
val text = "Hello World"
val utf8Bytes = utf8Encoder.encodeToByteArray(text)
val latin1Bytes = latin1Encoder.encodeToByteArray(text)

println("Text: '$text'")
println("UTF-8 bytes: ${utf8Bytes.size}")
println("ISO-8859-1 bytes: ${latin1Bytes.size}")

Exception Classes

Exception types for character encoding and decoding error handling.

/**
 * Base exception for malformed input during character encoding/decoding.
 */
abstract class MalformedInputException(message: String) : Throwable(message)

/**
 * Exception thrown when a line is too long during text processing.
 * Extends MalformedInputException for consistent error handling.
 */
class TooLongLineException(message: String) : MalformedInputException(message)

Usage Examples:

import io.ktor.utils.io.charsets.*
import io.ktor.utils.io.core.*

// Handle encoding exceptions
fun safeEncode(text: String, charset: Charset): ByteArray? {
    return try {
        val encoder = charset.newEncoder()
        encoder.encodeToByteArray(text)
    } catch (e: MalformedInputException) {
        println("Failed to encode text: ${e.message}")
        null
    }
}

// Handle decoding exceptions
fun safeDecode(bytes: ByteArray, charset: Charset): String? {
    return try {
        val decoder = charset.newDecoder()
        val input = ByteReadPacket(bytes)
        decoder.decode(input)
    } catch (e: MalformedInputException) {
        println("Failed to decode bytes: ${e.message}")
        null
    } catch (e: TooLongLineException) {
        println("Line too long: ${e.message}")
        null
    }
}

// Usage
val validText = "Hello World"
val validBytes = safeEncode(validText, Charsets.UTF_8)
val decodedText = validBytes?.let { safeDecode(it, Charsets.UTF_8) }

// Handle malformed input
val malformedBytes = byteArrayOf(0xFF.toByte(), 0xFE.toByte()) // Invalid UTF-8
val result = safeDecode(malformedBytes, Charsets.UTF_8) // Returns null

Text Processing Extensions

Extension functions for common text encoding and decoding operations.

/**
 * Convert string to byte array using UTF-8 encoding.
 * @param charset character encoding to use (default UTF-8)
 * @return encoded byte array
 */
fun String.toByteArray(charset: Charset = Charsets.UTF_8): ByteArray

/**
 * Convert byte array to string using UTF-8 encoding.
 * @param charset character encoding to use (default UTF-8)
 * @return decoded string
 */
fun ByteArray.toString(charset: Charset = Charsets.UTF_8): String

/**
 * Write UTF-8 text to output.
 * @param text string to write
 */
fun Output.writeText(text: String)

/**
 * Write UTF-8 text followed by line separator.
 * @param text string to write
 */
fun Output.writeTextLine(text: String)

/**
 * Read UTF-8 text from input.
 * @param min minimum characters to read
 * @param max maximum characters to read
 * @return decoded string
 */
fun Input.readText(min: Int = 0, max: Int = Int.MAX_VALUE): String

/**
 * Read exactly the specified number of UTF-8 characters.
 * @param exactCharacters exact number of characters to read
 * @return decoded string
 */
fun Input.readTextExact(exactCharacters: Int): String

Usage Examples:

import io.ktor.utils.io.charsets.*
import io.ktor.utils.io.core.*

// String and byte array conversions
val text = "Hello, 世界!"
val utf8Bytes = text.toByteArray(Charsets.UTF_8)
val latin1Bytes = text.toByteArray(Charsets.ISO_8859_1)

val decodedFromUtf8 = utf8Bytes.toString(Charsets.UTF_8)
val decodedFromLatin1 = latin1Bytes.toString(Charsets.ISO_8859_1)

// Writing text to packets
val packet = buildPacket {
    writeText("Line 1")
    writeTextLine("Line 2 with newline")
    writeText("Line 3")
}

// Reading text from packets
val input = ByteReadPacket("Hello World Test".toByteArray())
val allText = input.readText()
val limitedText = input.readText(min = 1, max = 5)

// Exact character reading
val exactInput = ByteReadPacket("Exact".toByteArray())
val exactText = exactInput.readTextExact(5) // Reads exactly 5 characters

// Integration with I/O operations
suspend fun writeTextToChannel(channel: ByteWriteChannel, text: String) {
    val bytes = text.toByteArray(Charsets.UTF_8)
    channel.writeFully(bytes, 0, bytes.size)
}

suspend fun readTextFromChannel(channel: ByteReadChannel, maxBytes: Int): String {
    val buffer = ByteArray(maxBytes)
    val bytesRead = channel.readAvailable(buffer, 0, buffer.size)
    return buffer.copyOf(bytesRead).toString(Charsets.UTF_8)
}

Advanced Character Encoding Usage

Complex scenarios involving multiple charsets, streaming, and error recovery.

// Multi-charset text processor
class MultiCharsetProcessor {
    private val charsets = mapOf(
        "utf-8" to Charsets.UTF_8,
        "iso-8859-1" to Charsets.ISO_8859_1
    )
    
    fun processText(input: ByteArray, charsetName: String): String {
        val charset = charsets[charsetName.lowercase()] 
            ?: throw IllegalArgumentException("Unsupported charset: $charsetName")
        
        val decoder = charset.newDecoder()
        val byteInput = ByteReadPacket(input)
        
        return try {
            decoder.decode(byteInput)
        } catch (e: MalformedInputException) {
            // Fallback to Latin-1 for binary data
            val fallbackDecoder = Charsets.ISO_8859_1.newDecoder()
            val fallbackInput = ByteReadPacket(input)
            fallbackDecoder.decode(fallbackInput)
        }
    }
    
    fun detectEncoding(bytes: ByteArray): String {
        // Simplified encoding detection
        return when {
            bytes.size >= 3 && 
            bytes[0] == 0xEF.toByte() && 
            bytes[1] == 0xBB.toByte() && 
            bytes[2] == 0xBF.toByte() -> "utf-8"
            
            bytes.all { it >= 0 } -> "ascii"
            else -> "iso-8859-1"
        }
    }
}

// Streaming text converter
class StreamingTextConverter(
    private val sourceCharset: Charset,
    private val targetCharset: Charset
) {
    fun convert(input: Input, output: Output) {
        val decoder = sourceCharset.newDecoder()
        val encoder = targetCharset.newEncoder()
        
        while (!input.endOfInput) {
            try {
                val text = decoder.decode(input, max = 1024)
                val packet = encoder.encode(text)
                output.writePacket(packet)
            } catch (e: MalformedInputException) {
                // Skip invalid bytes
                if (!input.endOfInput) {
                    input.discard(1)
                }
            }
        }
    }
}

Install with Tessl CLI

npx tessl i tessl/maven-io-ktor--ktor-io-watchosarm64

docs

async-channels.md

byte-order.md

character-encoding.md

index.md

memory-management.md

object-pooling.md

packet-io.md

tile.json