pypi-anthropic

Description
The official Python library for the anthropic API
Author
tessl
Last updated

How to use

npx @tessl/cli registry install tessl/pypi-anthropic@0.66.0

vertex.md docs/

1
# Google Vertex AI Integration
2
3
Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.
4
5
## Capabilities
6
7
### Vertex AI Client Classes
8
9
Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.
10
11
```python { .api }
12
class AnthropicVertex:
13
def __init__(
14
self,
15
*,
16
project_id: Optional[str] = None,
17
region: Optional[str] = None,
18
**kwargs
19
): ...
20
21
messages: Messages
22
completions: Completions
23
24
class AsyncAnthropicVertex:
25
def __init__(
26
self,
27
*,
28
project_id: Optional[str] = None,
29
region: Optional[str] = None,
30
**kwargs
31
): ...
32
33
messages: AsyncMessages
34
completions: AsyncCompletions
35
```
36
37
## Usage Examples
38
39
### Basic Vertex AI Setup
40
41
```python
42
from anthropic import AnthropicVertex
43
44
# Basic configuration with project ID and region
45
client = AnthropicVertex(
46
project_id="your-project-id",
47
region="us-central1"
48
)
49
50
# Using environment variables for configuration
51
import os
52
os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"
53
os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"
54
55
client = AnthropicVertex()
56
```
57
58
### Google Cloud Authentication
59
60
```python
61
import os
62
from google.oauth2 import service_account
63
from anthropic import AnthropicVertex
64
65
# Method 1: Service Account Key File
66
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"
67
68
client = AnthropicVertex(
69
project_id="your-project-id",
70
region="us-central1"
71
)
72
73
# Method 2: Service Account from Code
74
credentials = service_account.Credentials.from_service_account_file(
75
"/path/to/service-account-key.json"
76
)
77
78
# Note: Direct credential passing may require additional configuration
79
client = AnthropicVertex(
80
project_id="your-project-id",
81
region="us-central1"
82
)
83
84
# Method 3: Default Application Credentials (recommended for GCP environments)
85
# This automatically uses credentials from:
86
# - Environment variable GOOGLE_APPLICATION_CREDENTIALS
87
# - gcloud CLI default credentials
88
# - Google Cloud metadata service (when running on GCP)
89
client = AnthropicVertex(
90
project_id="your-project-id",
91
region="us-central1"
92
)
93
```
94
95
### Messages with Vertex AI
96
97
```python
98
# Create message using Vertex AI
99
message = client.messages.create(
100
model="claude-sonnet-4-20250514", # Vertex AI model name
101
max_tokens=1024,
102
messages=[
103
{"role": "user", "content": "Hello from Google Vertex AI!"}
104
]
105
)
106
107
print(message.content[0].text)
108
```
109
110
### Vertex AI Model Selection
111
112
```python
113
# Available Claude models on Vertex AI (example model names)
114
VERTEX_MODELS = {
115
"claude-sonnet-4": "claude-sonnet-4-20250514",
116
"claude-haiku-3": "claude-haiku-3-20241022",
117
"claude-opus-3": "claude-opus-3-20240229"
118
}
119
120
def create_vertex_message(model_name: str, prompt: str) -> str:
121
"""Create message with Vertex AI model"""
122
123
if model_name not in VERTEX_MODELS:
124
raise ValueError(f"Unknown model: {model_name}")
125
126
model_id = VERTEX_MODELS[model_name]
127
128
message = client.messages.create(
129
model=model_id,
130
max_tokens=1024,
131
messages=[
132
{"role": "user", "content": prompt}
133
]
134
)
135
136
return message.content[0].text
137
138
# Usage
139
response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")
140
print(response)
141
```
142
143
### Multi-Region Vertex AI
144
145
```python
146
class VertexMultiRegion:
147
"""Manage Vertex AI clients across multiple Google Cloud regions"""
148
149
def __init__(self, project_id: str, regions: List[str]):
150
self.project_id = project_id
151
self.clients = {}
152
for region in regions:
153
self.clients[region] = AnthropicVertex(
154
project_id=project_id,
155
region=region
156
)
157
158
def create_message(self, region: str, **kwargs) -> Any:
159
"""Create message in specific region"""
160
if region not in self.clients:
161
raise ValueError(f"Region {region} not configured")
162
163
return self.clients[region].messages.create(**kwargs)
164
165
def find_best_region(self, model: str) -> str:
166
"""Find best region for a model (simplified example)"""
167
# In practice, you'd check model availability per region
168
region_preferences = {
169
"claude-opus": "us-central1", # Largest models in central region
170
"claude-sonnet": "us-west1", # Balanced models in west
171
"claude-haiku": "us-east1" # Fast models in east
172
}
173
174
for model_type, preferred_region in region_preferences.items():
175
if model_type in model.lower():
176
return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]
177
178
return list(self.clients.keys())[0] # Default to first region
179
180
# Usage
181
multi_region = VertexMultiRegion(
182
project_id="your-project-id",
183
regions=["us-central1", "us-west1", "us-east1"]
184
)
185
186
best_region = multi_region.find_best_region("claude-sonnet-4")
187
message = multi_region.create_message(
188
region=best_region,
189
model="claude-sonnet-4-20250514",
190
max_tokens=1024,
191
messages=[{"role": "user", "content": "Hello from multi-region!"}]
192
)
193
```
194
195
### Async Vertex AI Usage
196
197
```python
198
import asyncio
199
from anthropic import AsyncAnthropicVertex
200
201
async def vertex_async_example():
202
# Create async Vertex AI client
203
async_client = AsyncAnthropicVertex(
204
project_id="your-project-id",
205
region="us-central1"
206
)
207
208
# Async message creation
209
message = await async_client.messages.create(
210
model="claude-sonnet-4-20250514",
211
max_tokens=1024,
212
messages=[
213
{"role": "user", "content": "Async Vertex AI request"}
214
]
215
)
216
217
return message.content[0].text
218
219
# Run async
220
result = asyncio.run(vertex_async_example())
221
print(f"Async Vertex AI result: {result}")
222
```
223
224
### Vertex AI Error Handling
225
226
```python
227
from google.api_core import exceptions as gcp_exceptions
228
from anthropic import AnthropicVertex, APIError
229
230
def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:
231
"""Make Vertex AI request with robust error handling"""
232
233
for attempt in range(max_retries):
234
try:
235
client = AnthropicVertex(
236
project_id="your-project-id",
237
region="us-central1"
238
)
239
240
message = client.messages.create(
241
model="claude-sonnet-4-20250514",
242
max_tokens=1024,
243
messages=[
244
{"role": "user", "content": prompt}
245
]
246
)
247
248
return message.content[0].text
249
250
except gcp_exceptions.Unauthenticated:
251
print("❌ Google Cloud authentication failed")
252
print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")
253
return None
254
255
except gcp_exceptions.PermissionDenied:
256
print("❌ Permission denied for Vertex AI")
257
print("Check IAM permissions for Vertex AI access")
258
return None
259
260
except gcp_exceptions.ResourceExhausted:
261
print(f"⏳ Quota exceeded (attempt {attempt + 1})")
262
if attempt < max_retries - 1:
263
time.sleep(2 ** attempt)
264
continue
265
print("❌ Quota exceeded. Max retries reached.")
266
return None
267
268
except gcp_exceptions.DeadlineExceeded:
269
print(f"⏰ Request timeout (attempt {attempt + 1})")
270
if attempt < max_retries - 1:
271
continue
272
print("❌ Request timeout. Max retries reached.")
273
return None
274
275
except gcp_exceptions.ServiceUnavailable:
276
print(f"🔥 Service unavailable (attempt {attempt + 1})")
277
if attempt < max_retries - 1:
278
time.sleep(5)
279
continue
280
print("❌ Service unavailable. Max retries reached.")
281
return None
282
283
except gcp_exceptions.NotFound:
284
print("❌ Model or resource not found")
285
print("Check model name and project configuration")
286
return None
287
288
except APIError as e:
289
print(f"❌ Anthropic API error: {e}")
290
return None
291
292
except Exception as e:
293
print(f"❌ Unexpected error: {e}")
294
return None
295
296
print("❌ Max retries reached")
297
return None
298
299
# Usage
300
result = robust_vertex_request("What are the benefits of using Google Vertex AI?")
301
if result:
302
print(f"Success: {result}")
303
```
304
305
### Vertex AI Configuration Management
306
307
```python
308
import json
309
from google.cloud import aiplatform
310
from typing import Dict, Any
311
312
class VertexConfig:
313
"""Configuration management for Vertex AI deployment"""
314
315
def __init__(self, config_file: str = "vertex-config.json"):
316
self.config_file = config_file
317
self.config = self.load_config()
318
319
def load_config(self) -> Dict[str, Any]:
320
"""Load configuration from file"""
321
try:
322
with open(self.config_file, 'r') as f:
323
return json.load(f)
324
except FileNotFoundError:
325
return self.default_config()
326
327
def default_config(self) -> Dict[str, Any]:
328
"""Default configuration"""
329
return {
330
"project_id": "your-project-id",
331
"regions": ["us-central1", "us-west1"],
332
"models": {
333
"fast": "claude-haiku-3-20241022",
334
"balanced": "claude-sonnet-4-20250514",
335
"powerful": "claude-opus-3-20240229"
336
},
337
"max_tokens": 1024,
338
"timeout": 30,
339
"max_retries": 3
340
}
341
342
def create_client(self, region: str = None) -> AnthropicVertex:
343
"""Create configured Vertex AI client"""
344
region = region or self.config["regions"][0]
345
346
return AnthropicVertex(
347
project_id=self.config["project_id"],
348
region=region,
349
timeout=self.config["timeout"],
350
max_retries=self.config["max_retries"]
351
)
352
353
def create_message(self, prompt: str, model_type: str = "balanced") -> str:
354
"""Create message with configured defaults"""
355
client = self.create_client()
356
model = self.config["models"].get(model_type, self.config["models"]["balanced"])
357
358
message = client.messages.create(
359
model=model,
360
max_tokens=self.config["max_tokens"],
361
messages=[
362
{"role": "user", "content": prompt}
363
]
364
)
365
366
return message.content[0].text
367
368
# Usage
369
config = VertexConfig()
370
371
# Quick message with defaults
372
response = config.create_message("Explain machine learning", model_type="fast")
373
print(response)
374
375
# Create client for custom usage
376
client = config.create_client(region="us-west1")
377
```
378
379
### Vertex AI with Service Account Impersonation
380
381
```python
382
from google.oauth2 import service_account
383
from google.auth import impersonated_credentials
384
from anthropic import AnthropicVertex
385
386
def create_impersonated_vertex_client(
387
source_credentials_file: str,
388
target_service_account: str,
389
project_id: str,
390
region: str
391
) -> AnthropicVertex:
392
"""Create Vertex AI client with service account impersonation"""
393
394
# Load source credentials
395
source_credentials = service_account.Credentials.from_service_account_file(
396
source_credentials_file
397
)
398
399
# Create impersonated credentials
400
target_credentials = impersonated_credentials.Credentials(
401
source_credentials=source_credentials,
402
target_principal=target_service_account,
403
target_scopes=["https://www.googleapis.com/auth/cloud-platform"]
404
)
405
406
# Note: Direct credential passing may require additional setup
407
# This is a conceptual example
408
return AnthropicVertex(
409
project_id=project_id,
410
region=region
411
)
412
413
# Usage
414
impersonated_client = create_impersonated_vertex_client(
415
source_credentials_file="/path/to/source-credentials.json",
416
target_service_account="target-sa@project.iam.gserviceaccount.com",
417
project_id="your-project-id",
418
region="us-central1"
419
)
420
```
421
422
### Vertex AI Cost Monitoring
423
424
```python
425
import time
426
from typing import Dict, List
427
from datetime import datetime
428
429
class VertexCostMonitor:
430
"""Monitor and optimize Vertex AI usage costs"""
431
432
def __init__(self, project_id: str):
433
self.project_id = project_id
434
self.usage_stats = {}
435
self.model_costs = {
436
# Example costs per 1K tokens (input/output)
437
"claude-haiku-3-20241022": (0.00025, 0.00125),
438
"claude-sonnet-4-20250514": (0.003, 0.015),
439
"claude-opus-3-20240229": (0.015, 0.075)
440
}
441
442
def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:
443
"""Select model based on cost-effectiveness"""
444
445
if quality_requirement == "minimal" or len(prompt) < 500:
446
return "claude-haiku-3-20241022"
447
elif quality_requirement == "maximum":
448
return "claude-opus-3-20240229"
449
else:
450
return "claude-sonnet-4-20250514"
451
452
def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
453
"""Estimate cost for request"""
454
if model not in self.model_costs:
455
return 0.0
456
457
input_cost, output_cost = self.model_costs[model]
458
return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)
459
460
def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:
461
"""Create message with cost monitoring"""
462
463
# Select cost-effective model
464
model = self.select_cost_effective_model(
465
prompt,
466
kwargs.get("quality_requirement", "balanced")
467
)
468
469
# Create message
470
start_time = time.time()
471
message = client.messages.create(
472
model=model,
473
messages=[{"role": "user", "content": prompt}],
474
**{k: v for k, v in kwargs.items() if k != "quality_requirement"}
475
)
476
duration = time.time() - start_time
477
478
# Track usage
479
usage = message.usage
480
cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)
481
482
# Update statistics
483
if model not in self.usage_stats:
484
self.usage_stats[model] = {
485
"requests": 0,
486
"total_cost": 0.0,
487
"total_tokens": 0,
488
"avg_duration": 0.0
489
}
490
491
stats = self.usage_stats[model]
492
stats["requests"] += 1
493
stats["total_cost"] += cost
494
stats["total_tokens"] += usage.input_tokens + usage.output_tokens
495
stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]
496
497
return message, {
498
"model": model,
499
"cost": cost,
500
"duration": duration,
501
"tokens": usage.input_tokens + usage.output_tokens,
502
"cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0
503
}
504
505
def get_cost_summary(self) -> Dict[str, Any]:
506
"""Get cost usage summary"""
507
total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())
508
total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
509
510
return {
511
"total_cost": total_cost,
512
"total_requests": total_requests,
513
"avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,
514
"model_breakdown": self.usage_stats
515
}
516
517
# Usage
518
monitor = VertexCostMonitor("your-project-id")
519
client = AnthropicVertex(
520
project_id="your-project-id",
521
region="us-central1"
522
)
523
524
message, stats = monitor.create_monitored_message(
525
client,
526
"Explain the benefits of cloud computing in detail",
527
max_tokens=500,
528
quality_requirement="balanced"
529
)
530
531
print(f"Model: {stats['model']}")
532
print(f"Cost: ${stats['cost']:.6f}")
533
print(f"Cost per token: ${stats['cost_per_token']:.8f}")
534
print(f"Duration: {stats['duration']:.2f}s")
535
print(f"Response: {message.content[0].text[:100]}...")
536
537
# Get overall cost summary
538
summary = monitor.get_cost_summary()
539
print(f"\nCost Summary:")
540
print(f"Total cost: ${summary['total_cost']:.6f}")
541
print(f"Total requests: {summary['total_requests']}")
542
print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")
543
```
544
545
### Vertex AI with Streaming
546
547
```python
548
# Streaming with Vertex AI
549
with client.messages.stream(
550
model="claude-sonnet-4-20250514",
551
max_tokens=1024,
552
messages=[
553
{"role": "user", "content": "Write a story about AI on Google Cloud"}
554
]
555
) as stream:
556
for text in stream.text_stream:
557
print(text, end="", flush=True)
558
```
559
560
### Vertex AI Deployment Patterns
561
562
```python
563
class VertexDeployment:
564
"""Production deployment patterns for Vertex AI"""
565
566
def __init__(self, project_id: str, environment: str = "production"):
567
self.project_id = project_id
568
self.environment = environment
569
self.clients = self._create_clients()
570
571
def _create_clients(self) -> Dict[str, AnthropicVertex]:
572
"""Create clients for different regions/purposes"""
573
574
configs = {
575
"primary": {
576
"region": "us-central1",
577
"timeout": 60,
578
"max_retries": 5
579
},
580
"fallback": {
581
"region": "us-west1",
582
"timeout": 45,
583
"max_retries": 3
584
},
585
"dev": {
586
"region": "us-east1",
587
"timeout": 30,
588
"max_retries": 2
589
}
590
}
591
592
clients = {}
593
for name, config in configs.items():
594
if self.environment == "development" and name != "dev":
595
continue
596
597
clients[name] = AnthropicVertex(
598
project_id=self.project_id,
599
region=config["region"],
600
timeout=config["timeout"],
601
max_retries=config["max_retries"]
602
)
603
604
return clients
605
606
def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:
607
"""Create message with automatic fallback"""
608
609
client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]
610
611
for client_name in client_order:
612
if client_name not in self.clients:
613
continue
614
615
try:
616
print(f"Trying {client_name} client...")
617
message = self.clients[client_name].messages.create(
618
messages=[{"role": "user", "content": prompt}],
619
**kwargs
620
)
621
print(f"✅ Success with {client_name} client")
622
return message.content[0].text
623
624
except Exception as e:
625
print(f"❌ {client_name} client failed: {e}")
626
continue
627
628
print("❌ All clients failed")
629
return None
630
631
# Usage
632
deployment = VertexDeployment("your-project-id", "production")
633
634
response = deployment.create_message_with_fallback(
635
"Explain quantum computing",
636
model="claude-sonnet-4-20250514",
637
max_tokens=1024
638
)
639
640
if response:
641
print(f"Response: {response}")
642
```