Character encoding and decoding support with charset management for text processing operations in byte channels.
Core charset handling and registry for text encoding operations.
/** Base class for character sets */
expect abstract class Charset
/** Registry of supported character sets */
expect object Charsets {
/** UTF-8 character set */
val UTF_8: Charset
/** ISO 8859-1 (Latin-1) character set */
val ISO_8859_1: Charset
}
/** Check if a charset name is supported */
expect fun Charsets.isSupported(name: String): Boolean
/** Get charset by name */
expect fun Charsets.forName(name: String): Charset
/** Get the name of a charset */
expect val Charset.name: StringUsage Examples:
import io.ktor.utils.io.charsets.*
fun charsetExamples() {
// Use predefined charsets
val utf8 = Charsets.UTF_8
val latin1 = Charsets.ISO_8859_1
// Check charset support
if (Charsets.isSupported("UTF-16")) {
val utf16 = Charsets.forName("UTF-16")
println("UTF-16 is supported")
}
// Common charset names
val ascii = Charsets.forName("US-ASCII")
val windows1252 = if (Charsets.isSupported("windows-1252")) {
Charsets.forName("windows-1252")
} else null
}Character encoding and decoding implementations for converting between text and bytes.
/** Encodes character sequences to byte sequences */
expect abstract class CharsetEncoder
/** Decodes byte sequences to character sequences */
expect abstract class CharsetDecoder
/** Encode a character sequence to a Source */
fun CharsetEncoder.encode(input: CharSequence): Source
/** Decode bytes from a Source with maximum character limit */
fun CharsetDecoder.decode(input: Source, max: Int): StringUsage Examples:
import io.ktor.utils.io.charsets.*
import kotlinx.io.*
fun encodingExamples() {
val charset = Charsets.UTF_8
// Get encoder and decoder
val encoder = charset.newEncoder()
val decoder = charset.newDecoder()
// Encode text to bytes
val text = "Hello, 世界!"
val encodedSource = encoder.encode(text)
// Decode bytes back to text
val decodedText = decoder.decode(encodedSource, max = 1000)
println(decodedText) // "Hello, 世界!"
// Handle different charsets
val latin1Encoder = Charsets.ISO_8859_1.newEncoder()
val latin1Text = "Café"
val latin1Encoded = latin1Encoder.encode(latin1Text)
}Character encoding operations integrated with byte channels.
/** Create a ByteReadChannel from text with specific charset */
fun ByteReadChannel(text: String, charset: Charset = Charsets.UTF_8): ByteReadChannel
/** Write a string using UTF-8 encoding */
suspend fun ByteWriteChannel.writeStringUtf8(value: String)
/** Write a string using platform default encoding */
suspend fun ByteWriteChannel.writeString(value: String)
/** Read a UTF-8 line from the channel */
suspend fun ByteReadChannel.readUTF8Line(max: Int = Int.MAX_VALUE): String?
/** Read a UTF-8 line into an Appendable */
suspend fun ByteReadChannel.readUTF8LineTo(
out: Appendable,
max: Int = Int.MAX_VALUE
): Boolean
/** Read a UTF-8 line with specific line ending mode */
suspend fun ByteReadChannel.readUTF8LineTo(
out: Appendable,
max: Int,
lineEnding: LineEndingMode
): BooleanUsage Examples:
import io.ktor.utils.io.*
import io.ktor.utils.io.charsets.*
suspend fun channelEncodingExamples() {
// Create channel with specific encoding
val utf8Channel = ByteReadChannel("Hello, UTF-8!", Charsets.UTF_8)
val latin1Channel = ByteReadChannel("Café", Charsets.ISO_8859_1)
// Write with encoding
val writeChannel = ByteChannel()
writeChannel.writeStringUtf8("UTF-8 text: 你好")
writeChannel.writeString("Platform encoding text")
// Read UTF-8 text
val readChannel = ByteReadChannel("Line 1\nLine 2\nLine 3")
while (!readChannel.isClosedForRead) {
val line = readChannel.readUTF8Line()
if (line != null) {
println("Read: $line")
} else {
break
}
}
// Read into StringBuilder
val textChannel = ByteReadChannel("Multi\nline\ntext")
val builder = StringBuilder()
while (textChannel.readUTF8LineTo(builder)) {
builder.append("\n")
}
println("All text: $builder")
}Support for different line ending modes in text processing.
/**
* Represents different line ending modes for text processing
*/
value class LineEndingMode {
companion object {
/** Carriage return (\r) */
val CR: LineEndingMode
/** Line feed (\n) */
val LF: LineEndingMode
/** Carriage return + line feed (\r\n) */
val CRLF: LineEndingMode
/** Any line ending mode */
val Any: LineEndingMode
}
/** Check if this mode contains another mode */
operator fun contains(other: LineEndingMode): Boolean
/** Combine line ending modes */
operator fun plus(other: LineEndingMode): LineEndingMode
}Usage Examples:
import io.ktor.utils.io.*
suspend fun lineEndingExamples() {
// Text with different line endings
val mixedText = "Line 1\r\nLine 2\nLine 3\rLine 4"
val channel = ByteReadChannel(mixedText)
val result = StringBuilder()
// Read with specific line ending mode
while (channel.readUTF8LineTo(result, max = 1000, lineEnding = LineEndingMode.Any)) {
result.append(" | ")
}
println("Lines: $result")
// Check line ending modes
val crlfMode = LineEndingMode.CRLF
val anyMode = LineEndingMode.Any
if (LineEndingMode.CR in anyMode) {
println("Any mode contains CR")
}
// Combine modes
val combined = LineEndingMode.CR + LineEndingMode.LF
println("Combined mode: $combined")
}Exceptions related to character encoding operations.
/** Exception thrown when input contains malformed character sequences */
expect open class MalformedInputException : Exception
/** Exception thrown when a line exceeds maximum length */
class TooLongLineException(message: String) : Exception(message)Usage Examples:
import io.ktor.utils.io.*
import io.ktor.utils.io.charsets.*
suspend fun exceptionHandlingExamples() {
try {
val channel = ByteReadChannel("Very long line without breaks...")
// This might throw TooLongLineException if line is too long
val line = channel.readUTF8Line(max = 10)
} catch (e: TooLongLineException) {
println("Line too long: ${e.message}")
}
try {
// Malformed input handling depends on the charset implementation
val decoder = Charsets.UTF_8.newDecoder()
// ... decoding operations that might fail
} catch (e: MalformedInputException) {
println("Malformed input: ${e.message}")
}
}// UTF-8 (default for most operations)
val utf8 = Charsets.UTF_8
// Latin-1 (ISO 8859-1)
val latin1 = Charsets.ISO_8859_1
// Platform-specific charsets
val ascii = Charsets.forName("US-ASCII")
val utf16 = Charsets.forName("UTF-16")
val windows1252 = Charsets.forName("windows-1252")// Create channels with specific encoding
val channel = ByteReadChannel(text, Charsets.UTF_8)
// Process line by line
while (!channel.isClosedForRead) {
val line = channel.readUTF8Line(max = 8192)
if (line != null) {
processLine(line)
}
}
// Write with encoding
writeChannel.writeStringUtf8("UTF-8 content")// Check charset support
if (Charsets.isSupported("CHARSET_NAME")) {
val charset = Charsets.forName("CHARSET_NAME")
// Use charset
} else {
// Fallback to UTF-8
val charset = Charsets.UTF_8
}
// Handle line length limits
try {
val line = channel.readUTF8Line(max = maxLineLength)
} catch (e: TooLongLineException) {
// Handle or skip long lines
}