or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

batch-operations.mdcaching.mdchat-sessions.mdclient-configuration.mdcontent-generation.mdembeddings-tokens.mderror-handling.mdfile-search-stores.mdfiles-management.mdimage-operations.mdindex.mdlive-sessions.mdmodel-tuning.mdoperations.mdtools-functions.mdtypes-reference.mdvideo-generation.md

embeddings-tokens.mddocs/

0

# Embeddings and Token Operations

1

2

Generate embeddings for text and manage token counting and computation for prompt optimization.

3

4

## Core Imports

5

6

```java

7

import com.google.genai.Models;

8

import com.google.genai.AsyncModels;

9

import com.google.genai.LocalTokenizer;

10

import com.google.genai.types.EmbedContentResponse;

11

import com.google.genai.types.EmbedContentConfig;

12

import com.google.genai.types.ContentEmbedding;

13

import com.google.genai.types.CountTokensResponse;

14

import com.google.genai.types.CountTokensConfig;

15

import com.google.genai.types.ComputeTokensResponse;

16

import com.google.genai.types.ComputeTokensConfig;

17

import java.util.concurrent.CompletableFuture;

18

```

19

20

## Embeddings

21

22

### Models Service - Embeddings

23

24

```java { .api }

25

package com.google.genai;

26

27

public final class Models {

28

// Single text embedding

29

public EmbedContentResponse embedContent(

30

String model,

31

String text,

32

EmbedContentConfig config);

33

34

// Multiple texts embedding

35

public EmbedContentResponse embedContent(

36

String model,

37

List<String> texts,

38

EmbedContentConfig config);

39

}

40

```

41

42

### Async Models Service - Embeddings

43

44

```java { .api }

45

package com.google.genai;

46

47

public final class AsyncModels {

48

public CompletableFuture<EmbedContentResponse> embedContent(

49

String model,

50

String text,

51

EmbedContentConfig config);

52

53

public CompletableFuture<EmbedContentResponse> embedContent(

54

String model,

55

List<String> texts,

56

EmbedContentConfig config);

57

}

58

```

59

60

### Embed Content Config

61

62

```java { .api }

63

package com.google.genai.types;

64

65

public final class EmbedContentConfig {

66

public static Builder builder();

67

68

public Optional<String> taskType();

69

public Optional<String> title();

70

public Optional<Integer> outputDimensionality();

71

public Optional<String> mimeType();

72

public Optional<Boolean> autoTruncate();

73

public Optional<HttpOptions> httpOptions();

74

}

75

```

76

77

**Task Types:**

78

- `RETRIEVAL_QUERY` - For search queries

79

- `RETRIEVAL_DOCUMENT` - For documents to be searched

80

- `SEMANTIC_SIMILARITY` - For similarity comparison

81

- `CLASSIFICATION` - For text classification

82

- `CLUSTERING` - For text clustering

83

84

### Embed Content Response

85

86

```java { .api }

87

package com.google.genai.types;

88

89

public final class EmbedContentResponse {

90

public Optional<ContentEmbedding> embedding();

91

public Optional<List<ContentEmbedding>> embeddings();

92

public Optional<HttpResponse> sdkHttpResponse();

93

}

94

```

95

96

### Content Embedding

97

98

```java { .api }

99

package com.google.genai.types;

100

101

public final class ContentEmbedding {

102

public Optional<List<Float>> values();

103

public Optional<ContentEmbeddingStatistics> statistics();

104

}

105

```

106

107

### Content Embedding Statistics

108

109

```java { .api }

110

package com.google.genai.types;

111

112

public final class ContentEmbeddingStatistics {

113

public Optional<Integer> tokenCount();

114

public Optional<Boolean> truncated();

115

}

116

```

117

118

### Basic Embedding Usage

119

120

```java

121

import com.google.genai.Client;

122

import com.google.genai.types.EmbedContentResponse;

123

124

Client client = new Client();

125

126

// Single text embedding

127

EmbedContentResponse response = client.models.embedContent(

128

"text-embedding-004",

129

"Why is the sky blue?",

130

null

131

);

132

133

// Access embedding values

134

response.embedding().ifPresent(embedding -> {

135

embedding.values().ifPresent(values -> {

136

System.out.println("Embedding dimension: " + values.size());

137

System.out.println("First few values: " + values.subList(0, 5));

138

});

139

});

140

```

141

142

### Multiple Text Embeddings

143

144

```java

145

import com.google.common.collect.ImmutableList;

146

import com.google.genai.types.ContentEmbedding;

147

148

List<String> texts = ImmutableList.of(

149

"What is machine learning?",

150

"How does AI work?",

151

"Explain neural networks"

152

);

153

154

EmbedContentResponse response = client.models.embedContent(

155

"text-embedding-004",

156

texts,

157

null

158

);

159

160

// Access multiple embeddings

161

response.embeddings().ifPresent(embeddings -> {

162

System.out.println("Generated " + embeddings.size() + " embeddings");

163

for (int i = 0; i < embeddings.size(); i++) {

164

ContentEmbedding emb = embeddings.get(i);

165

System.out.println("Text " + (i + 1) + " embedding dimension: " +

166

emb.values().map(List::size).orElse(0));

167

}

168

});

169

```

170

171

### Embedding with Configuration

172

173

```java

174

import com.google.genai.types.EmbedContentConfig;

175

176

EmbedContentConfig config = EmbedContentConfig.builder()

177

.taskType("RETRIEVAL_DOCUMENT")

178

.title("Document about AI")

179

.outputDimensionality(256) // Reduce dimensionality

180

.autoTruncate(true)

181

.build();

182

183

EmbedContentResponse response = client.models.embedContent(

184

"text-embedding-004",

185

"This is a long document about artificial intelligence...",

186

config

187

);

188

189

// Check if truncated

190

response.embedding().ifPresent(embedding -> {

191

embedding.statistics().ifPresent(stats -> {

192

if (stats.truncated().orElse(false)) {

193

System.out.println("Input was truncated");

194

}

195

System.out.println("Token count: " + stats.tokenCount().orElse(0));

196

});

197

});

198

```

199

200

### Embeddings for Search

201

202

```java

203

import com.google.genai.types.EmbedContentConfig;

204

205

// Embed query

206

EmbedContentConfig queryConfig = EmbedContentConfig.builder()

207

.taskType("RETRIEVAL_QUERY")

208

.build();

209

210

EmbedContentResponse queryResponse = client.models.embedContent(

211

"text-embedding-004",

212

"What is the capital of France?",

213

queryConfig

214

);

215

216

// Embed documents

217

EmbedContentConfig docConfig = EmbedContentConfig.builder()

218

.taskType("RETRIEVAL_DOCUMENT")

219

.build();

220

221

List<String> documents = ImmutableList.of(

222

"Paris is the capital and largest city of France.",

223

"London is the capital city of England.",

224

"Berlin is the capital and largest city of Germany."

225

);

226

227

EmbedContentResponse docsResponse = client.models.embedContent(

228

"text-embedding-004",

229

documents,

230

docConfig

231

);

232

233

// Now compute similarity between query and documents

234

List<Float> queryEmbedding = queryResponse.embedding()

235

.flatMap(ContentEmbedding::values)

236

.orElse(ImmutableList.of());

237

238

docsResponse.embeddings().ifPresent(docEmbeddings -> {

239

for (int i = 0; i < docEmbeddings.size(); i++) {

240

List<Float> docEmbedding = docEmbeddings.get(i).values().orElse(ImmutableList.of());

241

double similarity = cosineSimilarity(queryEmbedding, docEmbedding);

242

System.out.println("Document " + (i + 1) + " similarity: " + similarity);

243

}

244

});

245

246

// Helper method for cosine similarity

247

private static double cosineSimilarity(List<Float> a, List<Float> b) {

248

double dotProduct = 0.0;

249

double normA = 0.0;

250

double normB = 0.0;

251

for (int i = 0; i < a.size(); i++) {

252

dotProduct += a.get(i) * b.get(i);

253

normA += a.get(i) * a.get(i);

254

normB += b.get(i) * b.get(i);

255

}

256

return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));

257

}

258

```

259

260

### Async Embeddings

261

262

```java

263

import java.util.concurrent.CompletableFuture;

264

265

CompletableFuture<EmbedContentResponse> future = client.async.models.embedContent(

266

"text-embedding-004",

267

"Async embedding text",

268

null

269

);

270

271

future.thenAccept(response -> {

272

response.embedding().ifPresent(embedding -> {

273

System.out.println("Embedding size: " +

274

embedding.values().map(List::size).orElse(0));

275

});

276

});

277

```

278

279

## Token Operations

280

281

### Models Service - Token Operations

282

283

```java { .api }

284

package com.google.genai;

285

286

public final class Models {

287

// Count tokens

288

public CountTokensResponse countTokens(

289

String model,

290

String text,

291

CountTokensConfig config);

292

293

public CountTokensResponse countTokens(

294

String model,

295

List<Content> contents,

296

CountTokensConfig config);

297

298

// Compute tokens (Vertex AI only)

299

public ComputeTokensResponse computeTokens(

300

String model,

301

String text,

302

ComputeTokensConfig config);

303

304

public ComputeTokensResponse computeTokens(

305

String model,

306

List<Content> contents,

307

ComputeTokensConfig config);

308

}

309

```

310

311

### Async Models Service - Token Operations

312

313

```java { .api }

314

package com.google.genai;

315

316

public final class AsyncModels {

317

public CompletableFuture<CountTokensResponse> countTokens(

318

String model,

319

String text,

320

CountTokensConfig config);

321

322

public CompletableFuture<CountTokensResponse> countTokens(

323

String model,

324

List<Content> contents,

325

CountTokensConfig config);

326

327

public CompletableFuture<ComputeTokensResponse> computeTokens(

328

String model,

329

String text,

330

ComputeTokensConfig config);

331

332

public CompletableFuture<ComputeTokensResponse> computeTokens(

333

String model,

334

List<Content> contents,

335

ComputeTokensConfig config);

336

}

337

```

338

339

### Count Tokens Config

340

341

```java { .api }

342

package com.google.genai.types;

343

344

public final class CountTokensConfig {

345

public static Builder builder();

346

347

public Optional<GenerateContentConfig> generateContentConfig();

348

public Optional<HttpOptions> httpOptions();

349

}

350

```

351

352

### Count Tokens Response

353

354

```java { .api }

355

package com.google.genai.types;

356

357

public final class CountTokensResponse {

358

public Optional<Integer> totalTokens();

359

public Optional<Integer> cachedContentTokenCount();

360

public Optional<HttpResponse> sdkHttpResponse();

361

}

362

```

363

364

### Compute Tokens Config

365

366

```java { .api }

367

package com.google.genai.types;

368

369

public final class ComputeTokensConfig {

370

public static Builder builder();

371

372

public Optional<GenerateContentConfig> generateContentConfig();

373

public Optional<HttpOptions> httpOptions();

374

}

375

```

376

377

### Compute Tokens Response

378

379

```java { .api }

380

package com.google.genai.types;

381

382

public final class ComputeTokensResponse {

383

public Optional<List<ComputeTokensResult>> tokensInfo();

384

public Optional<HttpResponse> sdkHttpResponse();

385

}

386

```

387

388

### Compute Tokens Result

389

390

```java { .api }

391

package com.google.genai.types;

392

393

public final class ComputeTokensResult {

394

public Optional<List<Integer>> tokenIds();

395

public Optional<List<String>> tokens();

396

public Optional<String> role();

397

}

398

```

399

400

### Basic Token Counting

401

402

```java

403

import com.google.genai.types.CountTokensResponse;

404

405

CountTokensResponse response = client.models.countTokens(

406

"gemini-2.0-flash",

407

"What is your name?",

408

null

409

);

410

411

System.out.println("Total tokens: " + response.totalTokens().orElse(0));

412

```

413

414

### Count Tokens with Content

415

416

```java

417

import com.google.genai.types.Content;

418

import com.google.genai.types.Part;

419

import com.google.common.collect.ImmutableList;

420

421

List<Content> contents = ImmutableList.of(

422

Content.builder()

423

.role("user")

424

.parts(ImmutableList.of(Part.fromText("Hello, how are you?")))

425

.build(),

426

Content.builder()

427

.role("model")

428

.parts(ImmutableList.of(Part.fromText("I'm doing well, thank you!")))

429

.build(),

430

Content.builder()

431

.role("user")

432

.parts(ImmutableList.of(Part.fromText("Tell me about AI")))

433

.build()

434

);

435

436

CountTokensResponse response = client.models.countTokens(

437

"gemini-2.0-flash",

438

contents,

439

null

440

);

441

442

System.out.println("Total tokens in conversation: " + response.totalTokens().orElse(0));

443

```

444

445

### Count Tokens with Generation Config

446

447

Use this to count tokens including system instructions and other config:

448

449

```java

450

import com.google.genai.types.CountTokensConfig;

451

import com.google.genai.types.GenerateContentConfig;

452

453

GenerateContentConfig genConfig = GenerateContentConfig.builder()

454

.systemInstruction(Content.fromParts(

455

Part.fromText("You are a helpful assistant.")

456

))

457

.build();

458

459

CountTokensConfig config = CountTokensConfig.builder()

460

.generateContentConfig(genConfig)

461

.build();

462

463

CountTokensResponse response = client.models.countTokens(

464

"gemini-2.0-flash",

465

"Tell me about AI",

466

config

467

);

468

469

System.out.println("Total tokens (including system instruction): " +

470

response.totalTokens().orElse(0));

471

```

472

473

### Compute Tokens (Vertex AI Only)

474

475

Compute tokens returns detailed token IDs and strings:

476

477

```java

478

import com.google.genai.types.ComputeTokensResponse;

479

480

Client client = Client.builder()

481

.vertexAI(true)

482

.project("your-project")

483

.location("us-central1")

484

.build();

485

486

ComputeTokensResponse response = client.models.computeTokens(

487

"gemini-2.0-flash",

488

"What is your name?",

489

null

490

);

491

492

response.tokensInfo().ifPresent(tokensInfo -> {

493

for (ComputeTokensResult result : tokensInfo) {

494

System.out.println("Role: " + result.role().orElse("N/A"));

495

496

result.tokenIds().ifPresent(ids -> {

497

System.out.println("Token IDs: " + ids);

498

});

499

500

result.tokens().ifPresent(tokens -> {

501

System.out.println("Tokens: " + tokens);

502

});

503

}

504

});

505

```

506

507

### Async Token Operations

508

509

```java

510

import java.util.concurrent.CompletableFuture;

511

512

CompletableFuture<CountTokensResponse> future = client.async.models.countTokens(

513

"gemini-2.0-flash",

514

"Count tokens for this text",

515

null

516

);

517

518

future.thenAccept(response -> {

519

System.out.println("Token count: " + response.totalTokens().orElse(0));

520

});

521

```

522

523

524

## Local Tokenizer (Experimental)

525

526

**NOTE:** Local tokenizer is experimental and only supports text-based tokenization (no multimodal).

527

528

Count tokens locally without making API calls, useful for quota management and cost estimation. LocalTokenizer provides free, offline token counting that doesn't consume API quota.

529

530

### LocalTokenizer Class

531

532

```java { .api }

533

package com.google.genai;

534

535

public final class LocalTokenizer {

536

// Constructor

537

public LocalTokenizer(String modelName);

538

539

// Count tokens

540

public CountTokensResult countTokens(List<Content> contents, CountTokensConfig config);

541

public CountTokensResult countTokens(List<Content> contents);

542

public CountTokensResult countTokens(Content content, CountTokensConfig config);

543

public CountTokensResult countTokens(Content content);

544

public CountTokensResult countTokens(String content, CountTokensConfig config);

545

public CountTokensResult countTokens(String content);

546

547

// Compute tokens (detailed)

548

public ComputeTokensResult computeTokens(List<Content> contents);

549

public ComputeTokensResult computeTokens(Content content);

550

public ComputeTokensResult computeTokens(String content);

551

}

552

```

553

554

### Count Tokens Result

555

556

```java { .api }

557

package com.google.genai.types;

558

559

public final class CountTokensResult {

560

public Optional<Integer> totalTokens();

561

}

562

```

563

564

### Compute Tokens Result

565

566

```java { .api }

567

package com.google.genai.types;

568

569

public final class ComputeTokensResult {

570

public Optional<Integer> totalTokens();

571

public Optional<List<TokensInfo>> tokensInfo();

572

}

573

```

574

575

### Tokens Info

576

577

```java { .api }

578

package com.google.genai.types;

579

580

public final class TokensInfo {

581

public Optional<String> role();

582

public Optional<List<Integer>> tokenIds();

583

}

584

```

585

586

### Basic Local Token Counting

587

588

```java

589

import com.google.genai.LocalTokenizer;

590

import com.google.genai.types.CountTokensResult;

591

592

// Create local tokenizer

593

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

594

595

// Count tokens for simple text

596

String text = "This is a sample text to count tokens for.";

597

CountTokensResult result = tokenizer.countTokens(text);

598

599

int tokenCount = result.totalTokens().orElse(0);

600

System.out.println("Token count: " + tokenCount);

601

602

// No API call was made - completely free!

603

```

604

605

### Count Tokens for Content

606

607

```java

608

import com.google.genai.types.Content;

609

import com.google.genai.types.Part;

610

611

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

612

613

Content content = Content.fromParts(

614

Part.fromText("User message here")

615

);

616

617

CountTokensResult result = tokenizer.countTokens(content);

618

System.out.println("Tokens: " + result.totalTokens().orElse(0));

619

```

620

621

### Count Tokens for Conversation

622

623

```java

624

import java.util.List;

625

626

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

627

628

List<Content> conversation = List.of(

629

Content.builder()

630

.role("user")

631

.parts(List.of(Part.fromText("Hello!")))

632

.build(),

633

Content.builder()

634

.role("model")

635

.parts(List.of(Part.fromText("Hi there! How can I help you today?")))

636

.build(),

637

Content.builder()

638

.role("user")

639

.parts(List.of(Part.fromText("Tell me about AI.")))

640

.build()

641

);

642

643

CountTokensResult result = tokenizer.countTokens(conversation);

644

System.out.println("Conversation tokens: " + result.totalTokens().orElse(0));

645

```

646

647

### Count Tokens with Config

648

649

```java

650

import com.google.genai.types.CountTokensConfig;

651

import com.google.genai.types.GenerateContentConfig;

652

import com.google.genai.types.Tool;

653

654

// Create config with tools

655

GenerateContentConfig genConfig = GenerateContentConfig.builder()

656

.tools(tools)

657

.build();

658

659

CountTokensConfig config = CountTokensConfig.builder()

660

.generateContentConfig(genConfig)

661

.build();

662

663

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

664

CountTokensResult result = tokenizer.countTokens("Text with tools", config);

665

```

666

667

### Compute Tokens (Detailed)

668

669

```java

670

import com.google.genai.types.ComputeTokensResult;

671

import com.google.genai.types.TokensInfo;

672

673

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

674

675

ComputeTokensResult result = tokenizer.computeTokens("Sample text");

676

677

result.totalTokens().ifPresent(total -> {

678

System.out.println("Total tokens: " + total);

679

});

680

681

result.tokensInfo().ifPresent(tokensInfoList -> {

682

for (TokensInfo info : tokensInfoList) {

683

info.role().ifPresent(role -> {

684

System.out.println("Role: " + role);

685

});

686

687

info.tokenIds().ifPresent(ids -> {

688

System.out.println("Token IDs: " + ids);

689

});

690

}

691

});

692

```

693

694

### Compute Tokens for Conversation

695

696

```java

697

List<Content> conversation = List.of(

698

Content.builder().role("user").parts(List.of(Part.fromText("Hi"))).build(),

699

Content.builder().role("model").parts(List.of(Part.fromText("Hello!"))).build()

700

);

701

702

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

703

ComputeTokensResult result = tokenizer.computeTokens(conversation);

704

705

result.tokensInfo().ifPresent(tokensInfoList -> {

706

for (TokensInfo info : tokensInfoList) {

707

String role = info.role().orElse("unknown");

708

int tokenCount = info.tokenIds().map(List::size).orElse(0);

709

System.out.println(role + ": " + tokenCount + " tokens");

710

}

711

});

712

```

713

714

### Compare Local vs API Token Counting

715

716

```java

717

LocalTokenizer localTokenizer = new LocalTokenizer("gemini-2.0-flash");

718

String text = "Sample text for comparison";

719

720

// Local counting (free, instant)

721

CountTokensResult localResult = localTokenizer.countTokens(text);

722

int localCount = localResult.totalTokens().orElse(0);

723

724

// API counting (uses quota, requires network)

725

CountTokensResponse apiResult = client.models.countTokens(

726

"gemini-2.0-flash",

727

text,

728

null

729

);

730

int apiCount = apiResult.totalTokens().orElse(0);

731

732

System.out.println("Local count: " + localCount);

733

System.out.println("API count: " + apiCount);

734

// Counts should be very close or identical

735

```

736

737

### Pre-check Before API Call

738

739

```java

740

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

741

String longPrompt = /* very long text */;

742

743

// Check locally first (free)

744

CountTokensResult result = tokenizer.countTokens(longPrompt);

745

int tokenCount = result.totalTokens().orElse(0);

746

747

if (tokenCount > 30000) {

748

System.out.println("Prompt exceeds context window, truncating...");

749

// Truncate before making API call

750

} else {

751

// Safe to proceed with API call

752

GenerateContentResponse response = client.models.generateContent(

753

"gemini-2.0-flash",

754

longPrompt,

755

null

756

);

757

}

758

```

759

760

### Batch Token Estimation

761

762

```java

763

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

764

765

List<String> prompts = List.of(

766

"Prompt 1",

767

"Prompt 2",

768

"Prompt 3"

769

);

770

771

int totalTokens = 0;

772

for (String prompt : prompts) {

773

CountTokensResult result = tokenizer.countTokens(prompt);

774

int tokens = result.totalTokens().orElse(0);

775

totalTokens += tokens;

776

System.out.println("Prompt: " + tokens + " tokens");

777

}

778

779

System.out.println("Total tokens for batch: " + totalTokens);

780

// Estimate cost before making batch API calls

781

```

782

783

### Limitations

784

785

**Text Only:** LocalTokenizer only supports text-based tokenization. It does not handle:

786

- Images

787

- Audio

788

- Video

789

- Other multimodal content

790

791

**Model Support:** Limited to models with available tokenizer models. Check documentation for supported models.

792

793

**Accuracy:** Token counts may differ slightly from API counts for edge cases, but should be very close for typical use.

794

795

```java

796

// This works

797

LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");

798

tokenizer.countTokens("Text content"); // ✓

799

800

// This is not supported

801

Content multimodal = Content.fromParts(

802

Part.fromText("Text"),

803

Part.fromImage(image) // LocalTokenizer cannot count image tokens

804

);

805

// tokenizer.countTokens(multimodal); // Will only count text part

806

```

807

808

## Token Management Best Practices

809

810

### Check Before Sending

811

812

```java

813

// Count tokens before making expensive API call

814

CountTokensResponse countResponse = client.models.countTokens(

815

"gemini-2.0-flash",

816

longPrompt,

817

null

818

);

819

820

int tokenCount = countResponse.totalTokens().orElse(0);

821

822

if (tokenCount > 30000) {

823

System.out.println("Prompt too long, truncating...");

824

// Truncate or split prompt

825

} else {

826

// Proceed with generation

827

GenerateContentResponse response = client.models.generateContent(

828

"gemini-2.0-flash",

829

longPrompt,

830

null

831

);

832

}

833

```

834

835

### Budget Management

836

837

```java

838

import com.google.genai.types.GenerateContentConfig;

839

840

// Set max output tokens to control costs

841

GenerateContentConfig config = GenerateContentConfig.builder()

842

.maxOutputTokens(500)

843

.build();

844

845

GenerateContentResponse response = client.models.generateContent(

846

"gemini-2.0-flash",

847

"Write a summary",

848

config

849

);

850

851

// Check actual usage

852

response.usageMetadata().ifPresent(usage -> {

853

System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));

854

System.out.println("Response tokens: " + usage.candidatesTokenCount().orElse(0));

855

System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));

856

});

857

```

858

859

### Cached Content Token Savings

860

861

```java

862

// When using cached content, check token savings

863

CountTokensResponse response = client.models.countTokens(

864

"gemini-2.0-flash",

865

"Query against cached content",

866

null

867

);

868

869

response.totalTokens().ifPresent(total -> {

870

response.cachedContentTokenCount().ifPresent(cached -> {

871

int uncachedTokens = total - cached;

872

System.out.println("Total tokens: " + total);

873

System.out.println("Cached tokens: " + cached);

874

System.out.println("Uncached tokens: " + uncachedTokens);

875

System.out.println("Token savings: " + (cached * 100.0 / total) + "%");

876

});

877

});

878

```

879