or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

async-channels.mdbyte-order.mdcharacter-encoding.mdindex.mdmemory-management.mdobject-pooling.mdpacket-io.md

character-encoding.mddocs/

0

# Character Encoding

1

2

Character encoding and decoding support with UTF-8 and ISO-8859-1 charsets, providing encoder/decoder abstractions for text processing.

3

4

## Capabilities

5

6

### Charset Classes

7

8

Abstract character set representations with encoder and decoder factory methods.

9

10

```kotlin { .api }

11

/**

12

* Abstract character set representation.

13

* Platform-specific implementation providing encoding and decoding functionality.

14

*/

15

abstract class Charset {

16

/** Name of the character set (e.g., "UTF-8", "ISO-8859-1") */

17

abstract val name: String

18

19

/**

20

* Create a new encoder for this charset.

21

* Encoders convert character sequences to byte sequences.

22

* @return new CharsetEncoder instance

23

*/

24

abstract fun newEncoder(): CharsetEncoder

25

26

/**

27

* Create a new decoder for this charset.

28

* Decoders convert byte sequences to character sequences.

29

* @return new CharsetDecoder instance

30

*/

31

abstract fun newDecoder(): CharsetDecoder

32

33

companion object {

34

/**

35

* Get a charset by name.

36

* @param name charset name (case-insensitive)

37

* @return Charset instance

38

* @throws UnsupportedCharsetException if charset is not supported

39

*/

40

fun forName(name: String): Charset

41

42

/**

43

* Check if a charset is supported on this platform.

44

* @param charset charset name to check

45

* @return true if charset is supported

46

*/

47

fun isSupported(charset: String): Boolean

48

}

49

}

50

```

51

52

**Usage Examples:**

53

54

```kotlin

55

import io.ktor.utils.io.charsets.*

56

57

// Get charset by name

58

val utf8 = Charset.forName("UTF-8")

59

val latin1 = Charset.forName("ISO-8859-1")

60

61

// Check charset support

62

val isSupported = Charset.isSupported("UTF-16") // Platform-dependent

63

64

// Create encoders and decoders

65

val encoder = utf8.newEncoder()

66

val decoder = utf8.newDecoder()

67

68

println("Charset: ${utf8.name}")

69

```

70

71

### CharsetEncoder Class

72

73

Character sequence to byte sequence encoder with configurable encoding options.

74

75

```kotlin { .api }

76

/**

77

* Encoder for converting character sequences to byte sequences.

78

* Platform-specific implementation optimized for the target charset.

79

*/

80

abstract class CharsetEncoder {

81

/** The charset this encoder converts to */

82

abstract val charset: Charset

83

84

/**

85

* Encode a character sequence to a byte array.

86

* @param input character sequence to encode

87

* @param fromIndex starting character index (inclusive)

88

* @param toIndex ending character index (exclusive)

89

* @return encoded byte array

90

*/

91

fun encodeToByteArray(

92

input: CharSequence,

93

fromIndex: Int = 0,

94

toIndex: Int = input.length

95

): ByteArray

96

97

/**

98

* Encode a character sequence to a ByteReadPacket.

99

* @param input character sequence to encode

100

* @param fromIndex starting character index (inclusive)

101

* @param toIndex ending character index (exclusive)

102

* @return encoded bytes as a packet

103

*/

104

fun encode(

105

input: CharSequence,

106

fromIndex: Int = 0,

107

toIndex: Int = input.length

108

): ByteReadPacket

109

}

110

```

111

112

**Usage Examples:**

113

114

```kotlin

115

import io.ktor.utils.io.charsets.*

116

import io.ktor.utils.io.core.*

117

118

// Encode text to bytes

119

val encoder = Charsets.UTF_8.newEncoder()

120

121

// Encode to byte array

122

val text = "Hello, δΈ–η•Œ! 🌍"

123

val bytes = encoder.encodeToByteArray(text)

124

println("Encoded ${text.length} characters to ${bytes.size} bytes")

125

126

// Encode partial text

127

val partialBytes = encoder.encodeToByteArray(text, fromIndex = 0, toIndex = 5)

128

129

// Encode to packet

130

val packet = encoder.encode(text)

131

val firstByte = packet.readByte()

132

val remainingBytes = packet.readRemaining()

133

134

// Encode with different charsets

135

val utf8Encoder = Charsets.UTF_8.newEncoder()

136

val latin1Encoder = Charsets.ISO_8859_1.newEncoder()

137

138

val utf8Bytes = utf8Encoder.encodeToByteArray("Hello")

139

val latin1Bytes = latin1Encoder.encodeToByteArray("Hello")

140

141

println("UTF-8: ${utf8Bytes.size} bytes")

142

println("ISO-8859-1: ${latin1Bytes.size} bytes")

143

```

144

145

### CharsetDecoder Class

146

147

Byte sequence to character sequence decoder with error handling and streaming support.

148

149

```kotlin { .api }

150

/**

151

* Decoder for converting byte sequences to character sequences.

152

* Platform-specific implementation with error handling for malformed input.

153

*/

154

abstract class CharsetDecoder {

155

/** The charset this decoder converts from */

156

abstract val charset: Charset

157

158

/**

159

* Decode bytes from input to a string.

160

* @param input byte input stream

161

* @param max maximum characters to decode

162

* @return decoded string

163

*/

164

fun decode(input: Input, max: Int = Int.MAX_VALUE): String

165

166

/**

167

* Decode bytes from input and append to destination.

168

* @param input byte input stream

169

* @param dst destination to append decoded characters

170

* @param max maximum characters to decode

171

* @return number of characters decoded

172

*/

173

fun decode(input: Input, dst: Appendable, max: Int = Int.MAX_VALUE): Int

174

175

/**

176

* Decode exactly the specified number of bytes to a string.

177

* @param input byte input stream

178

* @param inputLength exact number of bytes to read

179

* @return decoded string

180

* @throws EOFException if not enough bytes available

181

*/

182

fun decodeExactBytes(input: Input, inputLength: Int): String

183

}

184

```

185

186

**Usage Examples:**

187

188

```kotlin

189

import io.ktor.utils.io.charsets.*

190

import io.ktor.utils.io.core.*

191

192

// Decode bytes to text

193

val decoder = Charsets.UTF_8.newDecoder()

194

195

// Create input from byte array

196

val bytes = "Hello, δΈ–η•Œ! 🌍".toByteArray(Charsets.UTF_8)

197

val input = ByteReadPacket(bytes)

198

199

// Decode to string

200

val decodedText = decoder.decode(input)

201

println("Decoded: $decodedText")

202

203

// Decode with character limit

204

val limitedInput = ByteReadPacket(bytes)

205

val partialText = decoder.decode(limitedInput, max = 10)

206

207

// Decode to appendable

208

val output = StringBuilder()

209

val bytesInput = ByteReadPacket(bytes)

210

val charactersDecoded = decoder.decode(bytesInput, output)

211

println("Decoded $charactersDecoded characters: ${output}")

212

213

// Decode exact byte count

214

val exactInput = ByteReadPacket("Test".toByteArray())

215

val exactText = decoder.decodeExactBytes(exactInput, inputLength = 4)

216

217

// Handle different encodings

218

val utf8Decoder = Charsets.UTF_8.newDecoder()

219

val latin1Decoder = Charsets.ISO_8859_1.newDecoder()

220

221

val testBytes = byteArrayOf(0x48, 0x65, 0x6C, 0x6C, 0x6F) // "Hello"

222

223

val utf8Result = utf8Decoder.decode(ByteReadPacket(testBytes))

224

val latin1Result = latin1Decoder.decode(ByteReadPacket(testBytes))

225

226

println("UTF-8 decoded: $utf8Result")

227

println("ISO-8859-1 decoded: $latin1Result")

228

```

229

230

### Standard Charsets

231

232

Pre-configured charset instances for commonly used character encodings.

233

234

```kotlin { .api }

235

/**

236

* Standard charset constants for commonly used character encodings.

237

*/

238

object Charsets {

239

/** UTF-8 character encoding */

240

val UTF_8: Charset

241

242

/** ISO-8859-1 (Latin-1) character encoding */

243

val ISO_8859_1: Charset

244

}

245

```

246

247

**Usage Examples:**

248

249

```kotlin

250

import io.ktor.utils.io.charsets.*

251

252

// Use standard charsets

253

val utf8 = Charsets.UTF_8

254

val latin1 = Charsets.ISO_8859_1

255

256

println("UTF-8 name: ${utf8.name}")

257

println("ISO-8859-1 name: ${latin1.name}")

258

259

// Create encoders for standard charsets

260

val utf8Encoder = Charsets.UTF_8.newEncoder()

261

val utf8Decoder = Charsets.UTF_8.newDecoder()

262

263

val latin1Encoder = Charsets.ISO_8859_1.newEncoder()

264

val latin1Decoder = Charsets.ISO_8859_1.newDecoder()

265

266

// Compare encoding results

267

val text = "Hello World"

268

val utf8Bytes = utf8Encoder.encodeToByteArray(text)

269

val latin1Bytes = latin1Encoder.encodeToByteArray(text)

270

271

println("Text: '$text'")

272

println("UTF-8 bytes: ${utf8Bytes.size}")

273

println("ISO-8859-1 bytes: ${latin1Bytes.size}")

274

```

275

276

### Exception Classes

277

278

Exception types for character encoding and decoding error handling.

279

280

```kotlin { .api }

281

/**

282

* Base exception for malformed input during character encoding/decoding.

283

*/

284

abstract class MalformedInputException(message: String) : Throwable(message)

285

286

/**

287

* Exception thrown when a line is too long during text processing.

288

* Extends MalformedInputException for consistent error handling.

289

*/

290

class TooLongLineException(message: String) : MalformedInputException(message)

291

```

292

293

**Usage Examples:**

294

295

```kotlin

296

import io.ktor.utils.io.charsets.*

297

import io.ktor.utils.io.core.*

298

299

// Handle encoding exceptions

300

fun safeEncode(text: String, charset: Charset): ByteArray? {

301

return try {

302

val encoder = charset.newEncoder()

303

encoder.encodeToByteArray(text)

304

} catch (e: MalformedInputException) {

305

println("Failed to encode text: ${e.message}")

306

null

307

}

308

}

309

310

// Handle decoding exceptions

311

fun safeDecode(bytes: ByteArray, charset: Charset): String? {

312

return try {

313

val decoder = charset.newDecoder()

314

val input = ByteReadPacket(bytes)

315

decoder.decode(input)

316

} catch (e: MalformedInputException) {

317

println("Failed to decode bytes: ${e.message}")

318

null

319

} catch (e: TooLongLineException) {

320

println("Line too long: ${e.message}")

321

null

322

}

323

}

324

325

// Usage

326

val validText = "Hello World"

327

val validBytes = safeEncode(validText, Charsets.UTF_8)

328

val decodedText = validBytes?.let { safeDecode(it, Charsets.UTF_8) }

329

330

// Handle malformed input

331

val malformedBytes = byteArrayOf(0xFF.toByte(), 0xFE.toByte()) // Invalid UTF-8

332

val result = safeDecode(malformedBytes, Charsets.UTF_8) // Returns null

333

```

334

335

### Text Processing Extensions

336

337

Extension functions for common text encoding and decoding operations.

338

339

```kotlin { .api }

340

/**

341

* Convert string to byte array using UTF-8 encoding.

342

* @param charset character encoding to use (default UTF-8)

343

* @return encoded byte array

344

*/

345

fun String.toByteArray(charset: Charset = Charsets.UTF_8): ByteArray

346

347

/**

348

* Convert byte array to string using UTF-8 encoding.

349

* @param charset character encoding to use (default UTF-8)

350

* @return decoded string

351

*/

352

fun ByteArray.toString(charset: Charset = Charsets.UTF_8): String

353

354

/**

355

* Write UTF-8 text to output.

356

* @param text string to write

357

*/

358

fun Output.writeText(text: String)

359

360

/**

361

* Write UTF-8 text followed by line separator.

362

* @param text string to write

363

*/

364

fun Output.writeTextLine(text: String)

365

366

/**

367

* Read UTF-8 text from input.

368

* @param min minimum characters to read

369

* @param max maximum characters to read

370

* @return decoded string

371

*/

372

fun Input.readText(min: Int = 0, max: Int = Int.MAX_VALUE): String

373

374

/**

375

* Read exactly the specified number of UTF-8 characters.

376

* @param exactCharacters exact number of characters to read

377

* @return decoded string

378

*/

379

fun Input.readTextExact(exactCharacters: Int): String

380

```

381

382

**Usage Examples:**

383

384

```kotlin

385

import io.ktor.utils.io.charsets.*

386

import io.ktor.utils.io.core.*

387

388

// String and byte array conversions

389

val text = "Hello, δΈ–η•Œ!"

390

val utf8Bytes = text.toByteArray(Charsets.UTF_8)

391

val latin1Bytes = text.toByteArray(Charsets.ISO_8859_1)

392

393

val decodedFromUtf8 = utf8Bytes.toString(Charsets.UTF_8)

394

val decodedFromLatin1 = latin1Bytes.toString(Charsets.ISO_8859_1)

395

396

// Writing text to packets

397

val packet = buildPacket {

398

writeText("Line 1")

399

writeTextLine("Line 2 with newline")

400

writeText("Line 3")

401

}

402

403

// Reading text from packets

404

val input = ByteReadPacket("Hello World Test".toByteArray())

405

val allText = input.readText()

406

val limitedText = input.readText(min = 1, max = 5)

407

408

// Exact character reading

409

val exactInput = ByteReadPacket("Exact".toByteArray())

410

val exactText = exactInput.readTextExact(5) // Reads exactly 5 characters

411

412

// Integration with I/O operations

413

suspend fun writeTextToChannel(channel: ByteWriteChannel, text: String) {

414

val bytes = text.toByteArray(Charsets.UTF_8)

415

channel.writeFully(bytes, 0, bytes.size)

416

}

417

418

suspend fun readTextFromChannel(channel: ByteReadChannel, maxBytes: Int): String {

419

val buffer = ByteArray(maxBytes)

420

val bytesRead = channel.readAvailable(buffer, 0, buffer.size)

421

return buffer.copyOf(bytesRead).toString(Charsets.UTF_8)

422

}

423

```

424

425

### Advanced Character Encoding Usage

426

427

Complex scenarios involving multiple charsets, streaming, and error recovery.

428

429

```kotlin { .api }

430

// Multi-charset text processor

431

class MultiCharsetProcessor {

432

private val charsets = mapOf(

433

"utf-8" to Charsets.UTF_8,

434

"iso-8859-1" to Charsets.ISO_8859_1

435

)

436

437

fun processText(input: ByteArray, charsetName: String): String {

438

val charset = charsets[charsetName.lowercase()]

439

?: throw IllegalArgumentException("Unsupported charset: $charsetName")

440

441

val decoder = charset.newDecoder()

442

val byteInput = ByteReadPacket(input)

443

444

return try {

445

decoder.decode(byteInput)

446

} catch (e: MalformedInputException) {

447

// Fallback to Latin-1 for binary data

448

val fallbackDecoder = Charsets.ISO_8859_1.newDecoder()

449

val fallbackInput = ByteReadPacket(input)

450

fallbackDecoder.decode(fallbackInput)

451

}

452

}

453

454

fun detectEncoding(bytes: ByteArray): String {

455

// Simplified encoding detection

456

return when {

457

bytes.size >= 3 &&

458

bytes[0] == 0xEF.toByte() &&

459

bytes[1] == 0xBB.toByte() &&

460

bytes[2] == 0xBF.toByte() -> "utf-8"

461

462

bytes.all { it >= 0 } -> "ascii"

463

else -> "iso-8859-1"

464

}

465

}

466

}

467

468

// Streaming text converter

469

class StreamingTextConverter(

470

private val sourceCharset: Charset,

471

private val targetCharset: Charset

472

) {

473

fun convert(input: Input, output: Output) {

474

val decoder = sourceCharset.newDecoder()

475

val encoder = targetCharset.newEncoder()

476

477

while (!input.endOfInput) {

478

try {

479

val text = decoder.decode(input, max = 1024)

480

val packet = encoder.encode(text)

481

output.writePacket(packet)

482

} catch (e: MalformedInputException) {

483

// Skip invalid bytes

484

if (!input.endOfInput) {

485

input.discard(1)

486

}

487

}

488

}

489

}

490

}

491

```