0
# Embeddings and Token Operations
1
2
Generate embeddings for text and manage token counting and computation for prompt optimization.
3
4
## Core Imports
5
6
```java
7
import com.google.genai.Models;
8
import com.google.genai.AsyncModels;
9
import com.google.genai.LocalTokenizer;
10
import com.google.genai.types.EmbedContentResponse;
11
import com.google.genai.types.EmbedContentConfig;
12
import com.google.genai.types.ContentEmbedding;
13
import com.google.genai.types.CountTokensResponse;
14
import com.google.genai.types.CountTokensConfig;
15
import com.google.genai.types.ComputeTokensResponse;
16
import com.google.genai.types.ComputeTokensConfig;
17
import java.util.concurrent.CompletableFuture;
18
```
19
20
## Embeddings
21
22
### Models Service - Embeddings
23
24
```java { .api }
25
package com.google.genai;
26
27
public final class Models {
28
// Single text embedding
29
public EmbedContentResponse embedContent(
30
String model,
31
String text,
32
EmbedContentConfig config);
33
34
// Multiple texts embedding
35
public EmbedContentResponse embedContent(
36
String model,
37
List<String> texts,
38
EmbedContentConfig config);
39
}
40
```
41
42
### Async Models Service - Embeddings
43
44
```java { .api }
45
package com.google.genai;
46
47
public final class AsyncModels {
48
public CompletableFuture<EmbedContentResponse> embedContent(
49
String model,
50
String text,
51
EmbedContentConfig config);
52
53
public CompletableFuture<EmbedContentResponse> embedContent(
54
String model,
55
List<String> texts,
56
EmbedContentConfig config);
57
}
58
```
59
60
### Embed Content Config
61
62
```java { .api }
63
package com.google.genai.types;
64
65
public final class EmbedContentConfig {
66
public static Builder builder();
67
68
public Optional<String> taskType();
69
public Optional<String> title();
70
public Optional<Integer> outputDimensionality();
71
public Optional<String> mimeType();
72
public Optional<Boolean> autoTruncate();
73
public Optional<HttpOptions> httpOptions();
74
}
75
```
76
77
**Task Types:**
78
- `RETRIEVAL_QUERY` - For search queries
79
- `RETRIEVAL_DOCUMENT` - For documents to be searched
80
- `SEMANTIC_SIMILARITY` - For similarity comparison
81
- `CLASSIFICATION` - For text classification
82
- `CLUSTERING` - For text clustering
83
84
### Embed Content Response
85
86
```java { .api }
87
package com.google.genai.types;
88
89
public final class EmbedContentResponse {
90
public Optional<ContentEmbedding> embedding();
91
public Optional<List<ContentEmbedding>> embeddings();
92
public Optional<HttpResponse> sdkHttpResponse();
93
}
94
```
95
96
### Content Embedding
97
98
```java { .api }
99
package com.google.genai.types;
100
101
public final class ContentEmbedding {
102
public Optional<List<Float>> values();
103
public Optional<ContentEmbeddingStatistics> statistics();
104
}
105
```
106
107
### Content Embedding Statistics
108
109
```java { .api }
110
package com.google.genai.types;
111
112
public final class ContentEmbeddingStatistics {
113
public Optional<Integer> tokenCount();
114
public Optional<Boolean> truncated();
115
}
116
```
117
118
### Basic Embedding Usage
119
120
```java
121
import com.google.genai.Client;
122
import com.google.genai.types.EmbedContentResponse;
123
124
Client client = new Client();
125
126
// Single text embedding
127
EmbedContentResponse response = client.models.embedContent(
128
"text-embedding-004",
129
"Why is the sky blue?",
130
null
131
);
132
133
// Access embedding values
134
response.embedding().ifPresent(embedding -> {
135
embedding.values().ifPresent(values -> {
136
System.out.println("Embedding dimension: " + values.size());
137
System.out.println("First few values: " + values.subList(0, 5));
138
});
139
});
140
```
141
142
### Multiple Text Embeddings
143
144
```java
145
import com.google.common.collect.ImmutableList;
146
import com.google.genai.types.ContentEmbedding;
147
148
List<String> texts = ImmutableList.of(
149
"What is machine learning?",
150
"How does AI work?",
151
"Explain neural networks"
152
);
153
154
EmbedContentResponse response = client.models.embedContent(
155
"text-embedding-004",
156
texts,
157
null
158
);
159
160
// Access multiple embeddings
161
response.embeddings().ifPresent(embeddings -> {
162
System.out.println("Generated " + embeddings.size() + " embeddings");
163
for (int i = 0; i < embeddings.size(); i++) {
164
ContentEmbedding emb = embeddings.get(i);
165
System.out.println("Text " + (i + 1) + " embedding dimension: " +
166
emb.values().map(List::size).orElse(0));
167
}
168
});
169
```
170
171
### Embedding with Configuration
172
173
```java
174
import com.google.genai.types.EmbedContentConfig;
175
176
EmbedContentConfig config = EmbedContentConfig.builder()
177
.taskType("RETRIEVAL_DOCUMENT")
178
.title("Document about AI")
179
.outputDimensionality(256) // Reduce dimensionality
180
.autoTruncate(true)
181
.build();
182
183
EmbedContentResponse response = client.models.embedContent(
184
"text-embedding-004",
185
"This is a long document about artificial intelligence...",
186
config
187
);
188
189
// Check if truncated
190
response.embedding().ifPresent(embedding -> {
191
embedding.statistics().ifPresent(stats -> {
192
if (stats.truncated().orElse(false)) {
193
System.out.println("Input was truncated");
194
}
195
System.out.println("Token count: " + stats.tokenCount().orElse(0));
196
});
197
});
198
```
199
200
### Embeddings for Search
201
202
```java
203
import com.google.genai.types.EmbedContentConfig;
204
205
// Embed query
206
EmbedContentConfig queryConfig = EmbedContentConfig.builder()
207
.taskType("RETRIEVAL_QUERY")
208
.build();
209
210
EmbedContentResponse queryResponse = client.models.embedContent(
211
"text-embedding-004",
212
"What is the capital of France?",
213
queryConfig
214
);
215
216
// Embed documents
217
EmbedContentConfig docConfig = EmbedContentConfig.builder()
218
.taskType("RETRIEVAL_DOCUMENT")
219
.build();
220
221
List<String> documents = ImmutableList.of(
222
"Paris is the capital and largest city of France.",
223
"London is the capital city of England.",
224
"Berlin is the capital and largest city of Germany."
225
);
226
227
EmbedContentResponse docsResponse = client.models.embedContent(
228
"text-embedding-004",
229
documents,
230
docConfig
231
);
232
233
// Now compute similarity between query and documents
234
List<Float> queryEmbedding = queryResponse.embedding()
235
.flatMap(ContentEmbedding::values)
236
.orElse(ImmutableList.of());
237
238
docsResponse.embeddings().ifPresent(docEmbeddings -> {
239
for (int i = 0; i < docEmbeddings.size(); i++) {
240
List<Float> docEmbedding = docEmbeddings.get(i).values().orElse(ImmutableList.of());
241
double similarity = cosineSimilarity(queryEmbedding, docEmbedding);
242
System.out.println("Document " + (i + 1) + " similarity: " + similarity);
243
}
244
});
245
246
// Helper method for cosine similarity
247
private static double cosineSimilarity(List<Float> a, List<Float> b) {
248
double dotProduct = 0.0;
249
double normA = 0.0;
250
double normB = 0.0;
251
for (int i = 0; i < a.size(); i++) {
252
dotProduct += a.get(i) * b.get(i);
253
normA += a.get(i) * a.get(i);
254
normB += b.get(i) * b.get(i);
255
}
256
return dotProduct / (Math.sqrt(normA) * Math.sqrt(normB));
257
}
258
```
259
260
### Async Embeddings
261
262
```java
263
import java.util.concurrent.CompletableFuture;
264
265
CompletableFuture<EmbedContentResponse> future = client.async.models.embedContent(
266
"text-embedding-004",
267
"Async embedding text",
268
null
269
);
270
271
future.thenAccept(response -> {
272
response.embedding().ifPresent(embedding -> {
273
System.out.println("Embedding size: " +
274
embedding.values().map(List::size).orElse(0));
275
});
276
});
277
```
278
279
## Token Operations
280
281
### Models Service - Token Operations
282
283
```java { .api }
284
package com.google.genai;
285
286
public final class Models {
287
// Count tokens
288
public CountTokensResponse countTokens(
289
String model,
290
String text,
291
CountTokensConfig config);
292
293
public CountTokensResponse countTokens(
294
String model,
295
List<Content> contents,
296
CountTokensConfig config);
297
298
// Compute tokens (Vertex AI only)
299
public ComputeTokensResponse computeTokens(
300
String model,
301
String text,
302
ComputeTokensConfig config);
303
304
public ComputeTokensResponse computeTokens(
305
String model,
306
List<Content> contents,
307
ComputeTokensConfig config);
308
}
309
```
310
311
### Async Models Service - Token Operations
312
313
```java { .api }
314
package com.google.genai;
315
316
public final class AsyncModels {
317
public CompletableFuture<CountTokensResponse> countTokens(
318
String model,
319
String text,
320
CountTokensConfig config);
321
322
public CompletableFuture<CountTokensResponse> countTokens(
323
String model,
324
List<Content> contents,
325
CountTokensConfig config);
326
327
public CompletableFuture<ComputeTokensResponse> computeTokens(
328
String model,
329
String text,
330
ComputeTokensConfig config);
331
332
public CompletableFuture<ComputeTokensResponse> computeTokens(
333
String model,
334
List<Content> contents,
335
ComputeTokensConfig config);
336
}
337
```
338
339
### Count Tokens Config
340
341
```java { .api }
342
package com.google.genai.types;
343
344
public final class CountTokensConfig {
345
public static Builder builder();
346
347
public Optional<GenerateContentConfig> generateContentConfig();
348
public Optional<HttpOptions> httpOptions();
349
}
350
```
351
352
### Count Tokens Response
353
354
```java { .api }
355
package com.google.genai.types;
356
357
public final class CountTokensResponse {
358
public Optional<Integer> totalTokens();
359
public Optional<Integer> cachedContentTokenCount();
360
public Optional<HttpResponse> sdkHttpResponse();
361
}
362
```
363
364
### Compute Tokens Config
365
366
```java { .api }
367
package com.google.genai.types;
368
369
public final class ComputeTokensConfig {
370
public static Builder builder();
371
372
public Optional<GenerateContentConfig> generateContentConfig();
373
public Optional<HttpOptions> httpOptions();
374
}
375
```
376
377
### Compute Tokens Response
378
379
```java { .api }
380
package com.google.genai.types;
381
382
public final class ComputeTokensResponse {
383
public Optional<List<ComputeTokensResult>> tokensInfo();
384
public Optional<HttpResponse> sdkHttpResponse();
385
}
386
```
387
388
### Compute Tokens Result
389
390
```java { .api }
391
package com.google.genai.types;
392
393
public final class ComputeTokensResult {
394
public Optional<List<Integer>> tokenIds();
395
public Optional<List<String>> tokens();
396
public Optional<String> role();
397
}
398
```
399
400
### Basic Token Counting
401
402
```java
403
import com.google.genai.types.CountTokensResponse;
404
405
CountTokensResponse response = client.models.countTokens(
406
"gemini-2.0-flash",
407
"What is your name?",
408
null
409
);
410
411
System.out.println("Total tokens: " + response.totalTokens().orElse(0));
412
```
413
414
### Count Tokens with Content
415
416
```java
417
import com.google.genai.types.Content;
418
import com.google.genai.types.Part;
419
import com.google.common.collect.ImmutableList;
420
421
List<Content> contents = ImmutableList.of(
422
Content.builder()
423
.role("user")
424
.parts(ImmutableList.of(Part.fromText("Hello, how are you?")))
425
.build(),
426
Content.builder()
427
.role("model")
428
.parts(ImmutableList.of(Part.fromText("I'm doing well, thank you!")))
429
.build(),
430
Content.builder()
431
.role("user")
432
.parts(ImmutableList.of(Part.fromText("Tell me about AI")))
433
.build()
434
);
435
436
CountTokensResponse response = client.models.countTokens(
437
"gemini-2.0-flash",
438
contents,
439
null
440
);
441
442
System.out.println("Total tokens in conversation: " + response.totalTokens().orElse(0));
443
```
444
445
### Count Tokens with Generation Config
446
447
Use this to count tokens including system instructions and other config:
448
449
```java
450
import com.google.genai.types.CountTokensConfig;
451
import com.google.genai.types.GenerateContentConfig;
452
453
GenerateContentConfig genConfig = GenerateContentConfig.builder()
454
.systemInstruction(Content.fromParts(
455
Part.fromText("You are a helpful assistant.")
456
))
457
.build();
458
459
CountTokensConfig config = CountTokensConfig.builder()
460
.generateContentConfig(genConfig)
461
.build();
462
463
CountTokensResponse response = client.models.countTokens(
464
"gemini-2.0-flash",
465
"Tell me about AI",
466
config
467
);
468
469
System.out.println("Total tokens (including system instruction): " +
470
response.totalTokens().orElse(0));
471
```
472
473
### Compute Tokens (Vertex AI Only)
474
475
Compute tokens returns detailed token IDs and strings:
476
477
```java
478
import com.google.genai.types.ComputeTokensResponse;
479
480
Client client = Client.builder()
481
.vertexAI(true)
482
.project("your-project")
483
.location("us-central1")
484
.build();
485
486
ComputeTokensResponse response = client.models.computeTokens(
487
"gemini-2.0-flash",
488
"What is your name?",
489
null
490
);
491
492
response.tokensInfo().ifPresent(tokensInfo -> {
493
for (ComputeTokensResult result : tokensInfo) {
494
System.out.println("Role: " + result.role().orElse("N/A"));
495
496
result.tokenIds().ifPresent(ids -> {
497
System.out.println("Token IDs: " + ids);
498
});
499
500
result.tokens().ifPresent(tokens -> {
501
System.out.println("Tokens: " + tokens);
502
});
503
}
504
});
505
```
506
507
### Async Token Operations
508
509
```java
510
import java.util.concurrent.CompletableFuture;
511
512
CompletableFuture<CountTokensResponse> future = client.async.models.countTokens(
513
"gemini-2.0-flash",
514
"Count tokens for this text",
515
null
516
);
517
518
future.thenAccept(response -> {
519
System.out.println("Token count: " + response.totalTokens().orElse(0));
520
});
521
```
522
523
524
## Local Tokenizer (Experimental)
525
526
**NOTE:** Local tokenizer is experimental and only supports text-based tokenization (no multimodal).
527
528
Count tokens locally without making API calls, useful for quota management and cost estimation. LocalTokenizer provides free, offline token counting that doesn't consume API quota.
529
530
### LocalTokenizer Class
531
532
```java { .api }
533
package com.google.genai;
534
535
public final class LocalTokenizer {
536
// Constructor
537
public LocalTokenizer(String modelName);
538
539
// Count tokens
540
public CountTokensResult countTokens(List<Content> contents, CountTokensConfig config);
541
public CountTokensResult countTokens(List<Content> contents);
542
public CountTokensResult countTokens(Content content, CountTokensConfig config);
543
public CountTokensResult countTokens(Content content);
544
public CountTokensResult countTokens(String content, CountTokensConfig config);
545
public CountTokensResult countTokens(String content);
546
547
// Compute tokens (detailed)
548
public ComputeTokensResult computeTokens(List<Content> contents);
549
public ComputeTokensResult computeTokens(Content content);
550
public ComputeTokensResult computeTokens(String content);
551
}
552
```
553
554
### Count Tokens Result
555
556
```java { .api }
557
package com.google.genai.types;
558
559
public final class CountTokensResult {
560
public Optional<Integer> totalTokens();
561
}
562
```
563
564
### Compute Tokens Result
565
566
```java { .api }
567
package com.google.genai.types;
568
569
public final class ComputeTokensResult {
570
public Optional<Integer> totalTokens();
571
public Optional<List<TokensInfo>> tokensInfo();
572
}
573
```
574
575
### Tokens Info
576
577
```java { .api }
578
package com.google.genai.types;
579
580
public final class TokensInfo {
581
public Optional<String> role();
582
public Optional<List<Integer>> tokenIds();
583
}
584
```
585
586
### Basic Local Token Counting
587
588
```java
589
import com.google.genai.LocalTokenizer;
590
import com.google.genai.types.CountTokensResult;
591
592
// Create local tokenizer
593
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
594
595
// Count tokens for simple text
596
String text = "This is a sample text to count tokens for.";
597
CountTokensResult result = tokenizer.countTokens(text);
598
599
int tokenCount = result.totalTokens().orElse(0);
600
System.out.println("Token count: " + tokenCount);
601
602
// No API call was made - completely free!
603
```
604
605
### Count Tokens for Content
606
607
```java
608
import com.google.genai.types.Content;
609
import com.google.genai.types.Part;
610
611
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
612
613
Content content = Content.fromParts(
614
Part.fromText("User message here")
615
);
616
617
CountTokensResult result = tokenizer.countTokens(content);
618
System.out.println("Tokens: " + result.totalTokens().orElse(0));
619
```
620
621
### Count Tokens for Conversation
622
623
```java
624
import java.util.List;
625
626
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
627
628
List<Content> conversation = List.of(
629
Content.builder()
630
.role("user")
631
.parts(List.of(Part.fromText("Hello!")))
632
.build(),
633
Content.builder()
634
.role("model")
635
.parts(List.of(Part.fromText("Hi there! How can I help you today?")))
636
.build(),
637
Content.builder()
638
.role("user")
639
.parts(List.of(Part.fromText("Tell me about AI.")))
640
.build()
641
);
642
643
CountTokensResult result = tokenizer.countTokens(conversation);
644
System.out.println("Conversation tokens: " + result.totalTokens().orElse(0));
645
```
646
647
### Count Tokens with Config
648
649
```java
650
import com.google.genai.types.CountTokensConfig;
651
import com.google.genai.types.GenerateContentConfig;
652
import com.google.genai.types.Tool;
653
654
// Create config with tools
655
GenerateContentConfig genConfig = GenerateContentConfig.builder()
656
.tools(tools)
657
.build();
658
659
CountTokensConfig config = CountTokensConfig.builder()
660
.generateContentConfig(genConfig)
661
.build();
662
663
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
664
CountTokensResult result = tokenizer.countTokens("Text with tools", config);
665
```
666
667
### Compute Tokens (Detailed)
668
669
```java
670
import com.google.genai.types.ComputeTokensResult;
671
import com.google.genai.types.TokensInfo;
672
673
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
674
675
ComputeTokensResult result = tokenizer.computeTokens("Sample text");
676
677
result.totalTokens().ifPresent(total -> {
678
System.out.println("Total tokens: " + total);
679
});
680
681
result.tokensInfo().ifPresent(tokensInfoList -> {
682
for (TokensInfo info : tokensInfoList) {
683
info.role().ifPresent(role -> {
684
System.out.println("Role: " + role);
685
});
686
687
info.tokenIds().ifPresent(ids -> {
688
System.out.println("Token IDs: " + ids);
689
});
690
}
691
});
692
```
693
694
### Compute Tokens for Conversation
695
696
```java
697
List<Content> conversation = List.of(
698
Content.builder().role("user").parts(List.of(Part.fromText("Hi"))).build(),
699
Content.builder().role("model").parts(List.of(Part.fromText("Hello!"))).build()
700
);
701
702
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
703
ComputeTokensResult result = tokenizer.computeTokens(conversation);
704
705
result.tokensInfo().ifPresent(tokensInfoList -> {
706
for (TokensInfo info : tokensInfoList) {
707
String role = info.role().orElse("unknown");
708
int tokenCount = info.tokenIds().map(List::size).orElse(0);
709
System.out.println(role + ": " + tokenCount + " tokens");
710
}
711
});
712
```
713
714
### Compare Local vs API Token Counting
715
716
```java
717
LocalTokenizer localTokenizer = new LocalTokenizer("gemini-2.0-flash");
718
String text = "Sample text for comparison";
719
720
// Local counting (free, instant)
721
CountTokensResult localResult = localTokenizer.countTokens(text);
722
int localCount = localResult.totalTokens().orElse(0);
723
724
// API counting (uses quota, requires network)
725
CountTokensResponse apiResult = client.models.countTokens(
726
"gemini-2.0-flash",
727
text,
728
null
729
);
730
int apiCount = apiResult.totalTokens().orElse(0);
731
732
System.out.println("Local count: " + localCount);
733
System.out.println("API count: " + apiCount);
734
// Counts should be very close or identical
735
```
736
737
### Pre-check Before API Call
738
739
```java
740
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
741
String longPrompt = /* very long text */;
742
743
// Check locally first (free)
744
CountTokensResult result = tokenizer.countTokens(longPrompt);
745
int tokenCount = result.totalTokens().orElse(0);
746
747
if (tokenCount > 30000) {
748
System.out.println("Prompt exceeds context window, truncating...");
749
// Truncate before making API call
750
} else {
751
// Safe to proceed with API call
752
GenerateContentResponse response = client.models.generateContent(
753
"gemini-2.0-flash",
754
longPrompt,
755
null
756
);
757
}
758
```
759
760
### Batch Token Estimation
761
762
```java
763
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
764
765
List<String> prompts = List.of(
766
"Prompt 1",
767
"Prompt 2",
768
"Prompt 3"
769
);
770
771
int totalTokens = 0;
772
for (String prompt : prompts) {
773
CountTokensResult result = tokenizer.countTokens(prompt);
774
int tokens = result.totalTokens().orElse(0);
775
totalTokens += tokens;
776
System.out.println("Prompt: " + tokens + " tokens");
777
}
778
779
System.out.println("Total tokens for batch: " + totalTokens);
780
// Estimate cost before making batch API calls
781
```
782
783
### Limitations
784
785
**Text Only:** LocalTokenizer only supports text-based tokenization. It does not handle:
786
- Images
787
- Audio
788
- Video
789
- Other multimodal content
790
791
**Model Support:** Limited to models with available tokenizer models. Check documentation for supported models.
792
793
**Accuracy:** Token counts may differ slightly from API counts for edge cases, but should be very close for typical use.
794
795
```java
796
// This works
797
LocalTokenizer tokenizer = new LocalTokenizer("gemini-2.0-flash");
798
tokenizer.countTokens("Text content"); // ✓
799
800
// This is not supported
801
Content multimodal = Content.fromParts(
802
Part.fromText("Text"),
803
Part.fromImage(image) // LocalTokenizer cannot count image tokens
804
);
805
// tokenizer.countTokens(multimodal); // Will only count text part
806
```
807
808
## Token Management Best Practices
809
810
### Check Before Sending
811
812
```java
813
// Count tokens before making expensive API call
814
CountTokensResponse countResponse = client.models.countTokens(
815
"gemini-2.0-flash",
816
longPrompt,
817
null
818
);
819
820
int tokenCount = countResponse.totalTokens().orElse(0);
821
822
if (tokenCount > 30000) {
823
System.out.println("Prompt too long, truncating...");
824
// Truncate or split prompt
825
} else {
826
// Proceed with generation
827
GenerateContentResponse response = client.models.generateContent(
828
"gemini-2.0-flash",
829
longPrompt,
830
null
831
);
832
}
833
```
834
835
### Budget Management
836
837
```java
838
import com.google.genai.types.GenerateContentConfig;
839
840
// Set max output tokens to control costs
841
GenerateContentConfig config = GenerateContentConfig.builder()
842
.maxOutputTokens(500)
843
.build();
844
845
GenerateContentResponse response = client.models.generateContent(
846
"gemini-2.0-flash",
847
"Write a summary",
848
config
849
);
850
851
// Check actual usage
852
response.usageMetadata().ifPresent(usage -> {
853
System.out.println("Prompt tokens: " + usage.promptTokenCount().orElse(0));
854
System.out.println("Response tokens: " + usage.candidatesTokenCount().orElse(0));
855
System.out.println("Total tokens: " + usage.totalTokenCount().orElse(0));
856
});
857
```
858
859
### Cached Content Token Savings
860
861
```java
862
// When using cached content, check token savings
863
CountTokensResponse response = client.models.countTokens(
864
"gemini-2.0-flash",
865
"Query against cached content",
866
null
867
);
868
869
response.totalTokens().ifPresent(total -> {
870
response.cachedContentTokenCount().ifPresent(cached -> {
871
int uncachedTokens = total - cached;
872
System.out.println("Total tokens: " + total);
873
System.out.println("Cached tokens: " + cached);
874
System.out.println("Uncached tokens: " + uncachedTokens);
875
System.out.println("Token savings: " + (cached * 100.0 / total) + "%");
876
});
877
});
878
```
879