Tessl Tile for maven/com.google.genai/google-genai@1.28.0

or run

npx @tessl/cli init

embeddings-tokens.mddocs/

0
# Embeddings and Token Operations
1

2
Generate embeddings for text and manage token counting and computation for prompt optimization.
3

4
## Core Imports
5

6
```java
7
import com.google.genai.Models;
8
import com.google.genai.AsyncModels;
9
import com.google.genai.LocalTokenizer;
10
import com.google.genai.types.EmbedContentResponse;
11
import com.google.genai.types.EmbedContentConfig;
12
import com.google.genai.types.ContentEmbedding;
13
import com.google.genai.types.CountTokensResponse;
14
import com.google.genai.types.CountTokensConfig;
15
import com.google.genai.types.ComputeTokensResponse;
16
import com.google.genai.types.ComputeTokensConfig;
17
import java.util.concurrent.CompletableFuture;
18
```
19

20
## Embeddings
21

22
### Models Service - Embeddings
23

24
```java { .api }
25
package com.google.genai;
26

27
public final class Models {
28
  // Single text embedding
29
  public EmbedContentResponse embedContent(
30
      String model,
31
      String text,
32
      EmbedContentConfig config);
33

34
  // Multiple texts embedding
35
  public EmbedContentResponse embedContent(
36
      String model,
37
      List<String> texts,
38
      EmbedContentConfig config);
39
}
40
```
41

42
### Async Models Service - Embeddings
43

44
```java { .api }
45
package com.google.genai;
46

47
public final class AsyncModels {
48
  public CompletableFuture<EmbedContentResponse> embedContent(
49
      String model,
50
      String text,
51
      EmbedContentConfig config);
52

53
  public CompletableFuture<EmbedContentResponse> embedContent(
54
      String model,
55
      List<String> texts,
56
      EmbedContentConfig config);
57
}
58
```
59

60
### Embed Content Config
61

62
```java { .api }
63
package com.google.genai.types;
64

65
public final class EmbedContentConfig {
66
  public static Builder builder();
67

68
  public Optional<String> taskType();
69
  public Optional<String> title();
70
  public Optional<Integer> outputDimensionality();
71
  public Optional<String> mimeType();
72
  public Optional<Boolean> autoTruncate();
73
  public Optional<HttpOptions> httpOptions();
74
}
75
```
76

77
**Task Types:**
78
- `RETRIEVAL_QUERY` - For search queries
79
- `RETRIEVAL_DOCUMENT` - For documents to be searched
80
- `SEMANTIC_SIMILARITY` - For similarity comparison
81
- `CLASSIFICATION` - For text classification
82
- `CLUSTERING` - For text clustering
83

84
### Embed Content Response
85

86
```java { .api }
87
package com.google.genai.types;
88

89
public final class EmbedContentResponse {
90
  public Optional<ContentEmbedding> embedding();
91
  public Optional<List<ContentEmbedding>> embeddings();
92
  public Optional<HttpResponse> sdkHttpResponse();
93
}
94
```
95

96
### Content Embedding
97

98
```java { .api }
99
package com.google.genai.types;
100

101
public final class ContentEmbedding {
102
  public Optional<List<Float>> values();
103
  public Optional<ContentEmbeddingStatistics> statistics();
104
}
105
```
106

107
### Content Embedding Statistics
108

109
```java { .api }
110
package com.google.genai.types;
111

112
public final class ContentEmbeddingStatistics {
113
  public Optional<Integer> tokenCount();
114
  public Optional<Boolean> truncated();
115
}
116
```
117

118
### Basic Embedding Usage
119

120
```java
121
import com.google.genai.Client;
122
import com.google.genai.types.EmbedContentResponse;
123

124
Client client = new Client();
125

126
// Single text embedding
127
EmbedContentResponse response = client.models.embedContent(
128
    "text-embedding-004",
129
    "Why is the sky blue?",
130
    null
131
);
132

133
// Access embedding values
134
response.embedding().ifPresent(embedding -> {
135
    embedding.values().ifPresent(values -> {
136
        System.out.println("Embedding dimension: " + values.size());
137
        System.out.println("First few values: " + values.subList(0, 5));
138
    });
139
});
140
```
141

142
### Multiple Text Embeddings
143

144
```java
145
import com.google.common.collect.ImmutableList;
146
import com.google.genai.types.ContentEmbedding;
147

148
List<String> texts = ImmutableList.of(
149
    "What is machine learning?",
150
    "How does AI work?",
151
    "Explain neural networks"
152
);
153

154
EmbedContentResponse response = client.models.embedContent(
155
    "text-embedding-004",
156
    texts,
157
    null
158
);
159

160
// Access multiple embeddings
161
response.embeddings().ifPresent(embeddings -> {
162
    System.out.println("Generated " + embeddings.size() + " embeddings");
163
    for (int i = 0; i < embeddings.size(); i++) {
164
        ContentEmbedding emb = embeddings.get(i);
165
        System.out.println("Text " + (i + 1) + " embedding dimension: " +
166
            emb.values().map(List::size).orElse(0));
167
    }
168
});
169
```
170

171
### Embedding with Configuration
172

173
```java
174
import com.google.genai.types.EmbedContentConfig;
175

176
EmbedContentConfig config = EmbedContentConfig.builder()
177
    .taskType("RETRIEVAL_DOCUMENT")
178
    .title("Document about AI")
179
    .outputDimensionality(256) // Reduce dimensionality
180
    .autoTruncate(true)
181
    .build();
182

183
EmbedContentResponse response = client.models.embedContent(
184
    "text-embedding-004",
185
    "This is a long document about artificial intelligence...",
186
    config
187
);
188

189
// Check if truncated
190
response.embedding().ifPresent(embedding -> {
191
    embedding.statistics().ifPresent(stats -> {
192
        if (stats.truncated().orElse(false)) {
193
            System.out.println("Input was truncated");
194
        }
195
        System.out.println("Token count: " + stats.tokenCount().orElse(0));
196
    });
197
});
198
```
199

200
### Embeddings for Search
201

202
```java
203
import com.google.genai.types.EmbedContentConfig;
204

205
// Embed query
206
EmbedContentConfig queryConfig = EmbedContentConfig.builder()
207
    .taskType("RETRIEVAL_QUERY")
208
    .build();
209

210
EmbedContentResponse queryResponse = client.models.embedContent(
211
    "text-embedding-004",
212
    "What is the capital of France?",
213
    queryConfig
214
);
215

216
// Embed documents
217
EmbedContentConfig docConfig = EmbedContentConfig.builder()
218
    .taskType("RETRIEVAL_DOCUMENT")
219
    .build();
220

221
List<String> documents = ImmutableList.of(
222
    "Paris is the capital and largest city of France.",
223
    "London is the capital city of England.",
224
    "Berlin is the capital and largest city of Germany."
225
);
226

227
EmbedContentResponse docsResponse = client.models.embedContent(
228
    "text-embedding-004",
229
    documents,
230
    docConfig
231
);
232

233
// Now compute similarity between query and documents
234
List<Float> queryEmbedding = queryResponse.embedding()
235
    .flatMap(ContentEmbedding::values)
236
    .orElse(ImmutableList.of());
237

238
docsResponse.embeddings().ifPresent(docEmbeddings -> {
239
    for (int i = 0; i < docEmbeddings.size(); i++) {
240
        List<Float> docEmbedding = docEmbeddings.get(i).values().orElse(ImmutableList.of());
241
        double similarity = cosineSimilarity(queryEmbedding, docEmbedding);
242
        System.out.println("Document " + (i + 1) + " similarity: " + similarity);
243
    }
244
});
245

246
// Helper method for cosine similarity
247
private static double cosineSimilarity(List<Float> a, List<Float> b) {
248
    double dotProduct = 0.0;
249
    double normA = 0.0;
250
    double normB = 0.0;
251
    for (int i = 0; i < a.size(); i++) {
252
        dotProduct += a.get(i) * b.get(i);
253
        normA += a.get(i) * a.get(i);
254
        normB += b.get(i) * b.get(i);
255
    }
256
    return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
257
}
258
```
259

260
### Async Embeddings
261

262
```java
263
import java.util.concurrent.CompletableFuture;
264

265
CompletableFuture<EmbedContentResponse> future = client.async.models.embedContent(
266
    "text-embedding-004",
267
    "Async embedding text",
268
    null
269
);
270

271
future.thenAccept(response -> {
272
    response.embedding().ifPresent(embedding -> {
273
        System.out.println("Embedding size: " +
274
            embedding.values().map(List::size).orElse(0));
275
    });
276
});
277
```
278

279
## Token Operations
280

281
### Models Service - Token Operations
282

283
```java { .api }
284
package com.google.genai;
285

286
public final class Models {
287
  // Count tokens
288
  public CountTokensResponse countTokens(
289
      String model,
290
      String text,
291
      CountTokensConfig config);
292

293
  public CountTokensResponse countTokens(
294
      String model,
295
      List<Content> contents,
296
      CountTokensConfig config);
297

298
  // Compute tokens (Vertex AI only)
299
  public ComputeTokensResponse computeTokens(
300
      String model,
301
      String text,
302
      ComputeTokensConfig config);
303

304
  public ComputeTokensResponse computeTokens(
305
      String model,
306
      List<Content> contents,
307
      ComputeTokensConfig config);
308
}
309
```
310

311
### Async Models Service - Token Operations
312

313
```java { .api }
314
package com.google.genai;
315

316
public final class AsyncModels {
317
  public CompletableFuture<CountTokensResponse> countTokens(
318
      String model,
319
      String text,
320
      CountTokensConfig config);
321

322
  public CompletableFuture<CountTokensResponse> countTokens(
323
      String model,
324
      List<Content> contents,
325
      CountTokensConfig config);
326

327
  public CompletableFuture<ComputeTokensResponse> computeTokens(
328
      String model,
329
      String text,
330
      ComputeTokensConfig config);
331

332
  public CompletableFuture<ComputeTokensResponse> computeTokens(
333
      String model,
334
      List<Content> contents,
335
      ComputeTokensConfig config);
336
}
337
```
338

339
### Count Tokens Config
340

341
```java { .api }
342
package com.google.genai.types;
343

344
public final class CountTokensConfig {
345
  public static Builder builder();
346

347
  public Optional<GenerateContentConfig> generateContentConfig();
348
  public Optional<HttpOptions> httpOptions();
349
}
350
```
351

352
### Count Tokens Response
353

354
```java { .api }
355
package com.google.genai.types;
356

357
public final class CountTokensResponse {
358
  public Optional<Integer> totalTokens();
359
  public Optional<Integer> cachedContentTokenCount();
360
  public Optional<HttpResponse> sdkHttpResponse();
361
}
362
```
363

364
### Compute Tokens Config
365

366
```java { .api }
367
package com.google.genai.types;
368

369
public final class ComputeTokensConfig {
370
  public static Builder builder();
371

372
  public Optional<GenerateContentConfig> generateContentConfig();
373
  public Optional<HttpOptions> httpOptions();
374
}
375
```
376

377
### Compute Tokens Response
378

379
```java { .api }
380
package com.google.genai.types;
381

382
public final class ComputeTokensResponse {
383
  public Optional<List<ComputeTokensResult>> tokensInfo();
384
  public Optional<HttpResponse> sdkHttpResponse();
385
}
386
```
387

388
### Compute Tokens Result
389

390
```java { .api }
391
package com.google.genai.types;
392

393
public final class ComputeTokensResult {
394
  public Optional<List<Integer>> tokenIds();
395
  public Optional<List<String>> tokens();
396
  public Optional<String> role();
397
}
398
```
399

400
### Basic Token Counting
401

402
```java
403
import com.google.genai.types.CountTokensResponse;
404

405
CountTokensResponse response = client.models.countTokens(
406
    "gemini-2.0-flash",
407
    "What is your name?",
408
    null
409
);
410

411
System.out.println("Total tokens: " + response.totalTokens().orElse(0));
412
```
413

414
### Count Tokens with Content
415

416
```java
417
import com.google.genai.types.Content;
418
import com.google.genai.types.Part;
419
import com.google.common.collect.ImmutableList;
420

421
List<Content> contents = ImmutableList.of(
422
    Content.builder()
423
        .role("user")
424
        .parts(ImmutableList.of(Part.fromText("Hello, how are you?")))
425
        .build(),
426
    Content.builder()
427
        .role("model")
428
        .parts(ImmutableList.of(Part.fromText("I'm doing well, thank you!")))
429
        .build(),
430
    Content.builder()
431
        .role("user")
432
        .parts(ImmutableList.of(Part.fromText("Tell me about AI")))
433
        .build()
434
);
435

436
CountTokensResponse response = client.models.countTokens(
437
    "gemini-2.0-flash",
438
    contents,
439
    null
440
);
441

442
System.out.println("Total tokens in conversation: " + response.totalTokens().orElse(0));
443
```
444

445
### Count Tokens with Generation Config
446

447
Use this to count tokens including system instructions and other config:
448

449
```java
450
import com.google.genai.types.CountTokensConfig;
451
import com.google.genai.types.GenerateContentConfig;
452

453
GenerateContentConfig genConfig = GenerateContentConfig.builder()
454
    .systemInstruction(Content.fromParts(
455
        Part.fromText("You are a helpful assistant.")
456
    ))
457
    .build();
458

459
CountTokensConfig config = CountTokensConfig.builder()
460
    .generateContentConfig(genConfig)
461
    .build();
462

463
CountTokensResponse response = client.models.countTokens(
464
    "gemini-2.0-flash",
465
    "Tell me about AI",
466
    config
467
);
468

469
System.out.println("Total tokens (including system instruction): " +
470
    response.totalTokens().orElse(0));
471
```
472

473
### Compute Tokens (Vertex AI Only)
474

475
Compute tokens returns detailed token IDs and strings:
476

477
```java
478
import com.google.genai.types.ComputeTokensResponse;
479

480
Client client = Client.builder()
481
    .vertexAI(true)
482
    .project("your-project")
483
    .location("us-central1")
484
    .build();
485

486
ComputeTokensResponse response = client.models.computeTokens(
487
    "gemini-2.0-flash",
488
    "What is your name?",
489
    null
490
);
491

492
response.tokensInfo().ifPresent(tokensInfo -> {
493
    for (ComputeTokensResult result : tokensInfo) {
494
        System.out.println("Role: " + result.role().orElse("N/A"));
495

496
        result.tokenIds().ifPresent(ids -> {
497
            System.out.println("Token IDs: " + ids);
498
        });
499

500
        result.tokens().ifPresent(tokens -> {
501
            System.out.println("Tokens: " + tokens);
502
        });
503
    }
504
});
505
```
506

507
### Async Token Operations
508

509
```java
510
import java.util.concurrent.CompletableFuture;
511

512
CompletableFuture<CountTokensResponse> future = client.async.models.countTokens(
513
    "gemini-2.0-flash",
514
    "Count tokens for this text",
515
    null
516
);
517

518
future.thenAccept(response -> {
519
    System.out.println("Token count: " + response.totalTokens().orElse(0));
520
});
521
```
522

523

524
## Local Tokenizer (Experimental)
525

526
**NOTE:** Local tokenizer is experimental and only supports text-based tokenization (no multimodal).
527

528
Count tokens locally without making API calls, useful for quota management and cost estimation. LocalTokenizer provides free, offline token counting that doesn't consume API quota.
529

530
### LocalTokenizer Class
531

532
```java { .api }
533
package com.google.genai;
534

535
public final class LocalTokenizer {
536
  // Constructor
537
  public LocalTokenizer(String modelName);
538

539
  // Count tokens
540
  public CountTokensResult countTokens(List<Content> contents, CountTokensConfig config);
541
  public CountTokensResult countTokens(List<Content> contents);
542
  public CountTokensResult countTokens(Content content, CountTokensConfig config);
543
  public CountTokensResult countTokens(Content content);
544
  public CountTokensResult countTokens(String content, CountTokensConfig config);
545
  public CountTokensResult countTokens(String content);
546

547
  // Compute tokens (detailed)
548
  public ComputeTokensResult computeTokens(List<Content> contents);
549
  public ComputeTokensResult computeTokens(Content content);
550
  public ComputeTokensResult computeTokens(String content);
551
}
552
```
553

554
### Count Tokens Result
555

556
```java { .api }
557
package com.google.genai.types;
558

559
public final class CountTokensResult {
560
  public Optional<Integer> totalTokens();
561
}
562
```
563

564
### Compute Tokens Result
565

566
```java { .api }
567
package com.google.genai.types;
568

569
public final class ComputeTokensResult {
570
  public Optional<Integer> totalTokens();
571
  public Optional<List<TokensInfo>> tokensInfo();
572
}
573
```
574

575
### Tokens Info
576

577
```java { .api }
578
package com.google.genai.types;
579

580
public final class TokensInfo {
581
  public Optional<String> role();
582
  public Optional<List<Integer>> tokenIds();
583
}
584
```
585

586
### Basic Local Token Counting
587

588
```java
589
import com.google.genai.LocalTokenizer;
590
import com.google.genai.types.CountTokensResult;
591

592
// Create local tokenizer
593
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
594

595
// Count tokens for simple text
596
String text = "This is a sample text to count tokens for.";
597
CountTokensResult result = tokenizer.countTokens(text);
598

599
int tokenCount = result.totalTokens().orElse(0);
600
System.out.println("Token count: " + tokenCount);
601

602
// No API call was made - completely free!
603
```
604

605
### Count Tokens for Content
606

607
```java
608
import com.google.genai.types.Content;
609
import com.google.genai.types.Part;
610

611
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
612

613
Content content = Content.fromParts(
614
    Part.fromText("User message here")
615
);
616

617
CountTokensResult result = tokenizer.countTokens(content);
618
System.out.println("Tokens: " + result.totalTokens().orElse(0));
619
```
620

621
### Count Tokens for Conversation
622

623
```java
624
import java.util.List;
625

626
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
627

628
List<Content> conversation = List.of(
629
    Content.builder()
630
        .role("user")
631
        .parts(List.of(Part.fromText("Hello!")))
632
        .build(),
633
    Content.builder()
634
        .role("model")
635
        .parts(List.of(Part.fromText("Hi there! How can I help you today?")))
636
        .build(),
637
    Content.builder()
638
        .role("user")
639
        .parts(List.of(Part.fromText("Tell me about AI.")))
640
        .build()
641
);
642

643
CountTokensResult result = tokenizer.countTokens(conversation);
644
System.out.println("Conversation tokens: " + result.totalTokens().orElse(0));
645
```
646

647
### Count Tokens with Config
648

649
```java
650
import com.google.genai.types.CountTokensConfig;
651
import com.google.genai.types.GenerateContentConfig;
652
import com.google.genai.types.Tool;
653

654
// Create config with tools
655
GenerateContentConfig genConfig = GenerateContentConfig.builder()
656
    .tools(tools)
657
    .build();
658

659
CountTokensConfig config = CountTokensConfig.builder()
660
    .generateContentConfig(genConfig)
661
    .build();
662

663
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
664
CountTokensResult result = tokenizer.countTokens("Text with tools", config);
665
```
666

667
### Compute Tokens (Detailed)
668

669
```java
670
import com.google.genai.types.ComputeTokensResult;
671
import com.google.genai.types.TokensInfo;
672

673
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
674

675
ComputeTokensResult result = tokenizer.computeTokens("Sample text");
676

677
result.totalTokens().ifPresent(total -> {
678
    System.out.println("Total tokens: " + total);
679
});
680

681
result.tokensInfo().ifPresent(tokensInfoList -> {
682
    for (TokensInfo info : tokensInfoList) {
683
        info.role().ifPresent(role -> {
684
            System.out.println("Role: " + role);
685
        });
686

687
        info.tokenIds().ifPresent(ids -> {
688
            System.out.println("Token IDs: " + ids);
689
        });
690
    }
691
});
692
```
693

694
### Compute Tokens for Conversation
695

696
```java
697
List<Content> conversation = List.of(
698
    Content.builder().role("user").parts(List.of(Part.fromText("Hi"))).build(),
699
    Content.builder().role("model").parts(List.of(Part.fromText("Hello!"))).build()
700
);
701

702
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
703
ComputeTokensResult result = tokenizer.computeTokens(conversation);
704

705
result.tokensInfo().ifPresent(tokensInfoList -> {
706
    for (TokensInfo info : tokensInfoList) {
707
        String role = info.role().orElse("unknown");
708
        int tokenCount = info.tokenIds().map(List::size).orElse(0);
709
        System.out.println(role + ": " + tokenCount + " tokens");
710
    }
711
});
712
```
713

714
### Compare Local vs API Token Counting
715

716
```java
717
LocalTokenizer localTokenizer = new LocalTokenizer("gemini-2.0-flash");
718
String text = "Sample text for comparison";
719

720
// Local counting (free, instant)
721
CountTokensResult localResult = localTokenizer.countTokens(text);
722
int localCount = localResult.totalTokens().orElse(0);
723

724
// API counting (uses quota, requires network)
725
CountTokensResponse apiResult = client.models.countTokens(
726
    "gemini-2.0-flash",
727
    text,
728
    null
729
);
730
int apiCount = apiResult.totalTokens().orElse(0);
731

732
System.out.println("Local count: " + localCount);
733
System.out.println("API count: " + apiCount);
734
// Counts should be very close or identical
735
```
736

737
### Pre-check Before API Call
738

739
```java
740
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
741
String longPrompt = /* very long text */;
742

743
// Check locally first (free)
744
CountTokensResult result = tokenizer.countTokens(longPrompt);
745
int tokenCount = result.totalTokens().orElse(0);
746

747
if (tokenCount > 30000) {
748
    System.out.println("Prompt exceeds context window, truncating...");
749
    // Truncate before making API call
750
} else {
751
    // Safe to proceed with API call
752
    GenerateContentResponse response = client.models.generateContent(
753
        "gemini-2.0-flash",
754
        longPrompt,
755
        null
756
    );
757
}
758
```
759

760
### Batch Token Estimation
761

762
```java
763
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
764

765
List<String> prompts = List.of(
766
    "Prompt 1",
767
    "Prompt 2",
768
    "Prompt 3"
769
);
770

771
int totalTokens = 0;
772
for (String prompt : prompts) {
773
    CountTokensResult result = tokenizer.countTokens(prompt);
774
    int tokens = result.totalTokens().orElse(0);
775
    totalTokens += tokens;
776
    System.out.println("Prompt: " + tokens + " tokens");
777
}
778

779
System.out.println("Total tokens for batch: " + totalTokens);
780
// Estimate cost before making batch API calls
781
```
782

783
### Limitations
784

785
**Text Only:** LocalTokenizer only supports text-based tokenization. It does not handle:
786
- Images
787
- Audio
788
- Video
789
- Other multimodal content
790

791
**Model Support:** Limited to models with available tokenizer models. Check documentation for supported models.
792

793
**Accuracy:** Token counts may differ slightly from API counts for edge cases, but should be very close for typical use.
794

795
```java
796
// This works
797
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
798
tokenizer.countTokens("Text content"); // ✓
799

800
// This is not supported
801
Content multimodal = Content.fromParts(
802
    Part.fromText("Text"),
803
    Part.fromImage(image) // LocalTokenizer cannot count image tokens
804
);
805
// tokenizer.countTokens(multimodal); // Will only count text part
806
```
807

808
## Token Management Best Practices
809

810
### Check Before Sending
811

812
```java
813
// Count tokens before making expensive API call
814
CountTokensResponse countResponse = client.models.countTokens(
815
    "gemini-2.0-flash",
816
    longPrompt,
817
    null
818
);
819

820
int tokenCount = countResponse.totalTokens().orElse(0);
821

822
if (tokenCount > 30000) {
823
    System.out.println("Prompt too long, truncating...");
824
    // Truncate or split prompt
825
} else {
826
    // Proceed with generation
827
    GenerateContentResponse response = client.models.generateContent(
828
        "gemini-2.0-flash",
829
        longPrompt,
830
        null
831
    );
832
}
833
```
834

835
### Budget Management
836

837
```java
838
import com.google.genai.types.GenerateContentConfig;
839

840
// Set max output tokens to control costs
841
GenerateContentConfig config = GenerateContentConfig.builder()
842
    .maxOutputTokens(500)
843
    .build();
844

845
GenerateContentResponse response = client.models.generateContent(
846
    "gemini-2.0-flash",
847
    "Write a summary",
848
    config
849
);
850

851
// Check actual usage
852
response.usageMetadata().ifPresent(usage -> {
853
    System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
854
    System.out.println("Response tokens: " + usage.candidatesTokenCount().orElse(0));
855
    System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
856
});
857
```
858

859
### Cached Content Token Savings
860

861
```java
862
// When using cached content, check token savings
863
CountTokensResponse response = client.models.countTokens(
864
    "gemini-2.0-flash",
865
    "Query against cached content",
866
    null
867
);
868

869
response.totalTokens().ifPresent(total -> {
870
    response.cachedContentTokenCount().ifPresent(cached -> {
871
        int uncachedTokens = total - cached;
872
        System.out.println("Total tokens: " + total);
873
        System.out.println("Cached tokens: " + cached);
874
        System.out.println("Uncached tokens: " + uncachedTokens);
875
        System.out.println("Token savings: " + (cached * 100.0 / total) + "%");
876
    });
877
});
878
```
879

Version

Tile

Files

embeddings-tokens.mddocs/

Version

Tile

Files

embeddings-tokens.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

embeddings-tokens.mddocs/