pypi-anthropic

Describes: pypi pkg:pypi/anthropic@0.66.x

Description: The official Python library for the anthropic API

Author: tessl

Last updated: 21 days ago

How to use

npx @tessl/cli registry install tessl/pypi-anthropic@0.66.0

Provide feedback Docs

vertex.md docs/

1
# Google Vertex AI Integration
2

3
Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.
4

5
## Capabilities
6

7
### Vertex AI Client Classes
8

9
Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.
10

11
```python { .api }
12
class AnthropicVertex:
13
    def __init__(
14
        self,
15
        *,
16
        project_id: Optional[str] = None,
17
        region: Optional[str] = None,
18
        **kwargs
19
    ): ...
20
    
21
    messages: Messages
22
    completions: Completions
23

24
class AsyncAnthropicVertex:
25
    def __init__(
26
        self,
27
        *,
28
        project_id: Optional[str] = None,
29
        region: Optional[str] = None,
30
        **kwargs
31
    ): ...
32
    
33
    messages: AsyncMessages
34
    completions: AsyncCompletions
35
```
36

37
## Usage Examples
38

39
### Basic Vertex AI Setup
40

41
```python
42
from anthropic import AnthropicVertex
43

44
# Basic configuration with project ID and region
45
client = AnthropicVertex(
46
    project_id="your-project-id",
47
    region="us-central1"
48
)
49

50
# Using environment variables for configuration
51
import os
52
os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"
53
os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"
54

55
client = AnthropicVertex()
56
```
57

58
### Google Cloud Authentication
59

60
```python
61
import os
62
from google.oauth2 import service_account
63
from anthropic import AnthropicVertex
64

65
# Method 1: Service Account Key File
66
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"
67

68
client = AnthropicVertex(
69
    project_id="your-project-id",
70
    region="us-central1"
71
)
72

73
# Method 2: Service Account from Code
74
credentials = service_account.Credentials.from_service_account_file(
75
    "/path/to/service-account-key.json"
76
)
77

78
# Note: Direct credential passing may require additional configuration
79
client = AnthropicVertex(
80
    project_id="your-project-id",
81
    region="us-central1"
82
)
83

84
# Method 3: Default Application Credentials (recommended for GCP environments)
85
# This automatically uses credentials from:
86
# - Environment variable GOOGLE_APPLICATION_CREDENTIALS
87
# - gcloud CLI default credentials
88
# - Google Cloud metadata service (when running on GCP)
89
client = AnthropicVertex(
90
    project_id="your-project-id",
91
    region="us-central1"
92
)
93
```
94

95
### Messages with Vertex AI
96

97
```python
98
# Create message using Vertex AI
99
message = client.messages.create(
100
    model="claude-sonnet-4-20250514",  # Vertex AI model name
101
    max_tokens=1024,
102
    messages=[
103
        {"role": "user", "content": "Hello from Google Vertex AI!"}
104
    ]
105
)
106

107
print(message.content[0].text)
108
```
109

110
### Vertex AI Model Selection
111

112
```python
113
# Available Claude models on Vertex AI (example model names)
114
VERTEX_MODELS = {
115
    "claude-sonnet-4": "claude-sonnet-4-20250514",
116
    "claude-haiku-3": "claude-haiku-3-20241022",
117
    "claude-opus-3": "claude-opus-3-20240229"
118
}
119

120
def create_vertex_message(model_name: str, prompt: str) -> str:
121
    """Create message with Vertex AI model"""
122
    
123
    if model_name not in VERTEX_MODELS:
124
        raise ValueError(f"Unknown model: {model_name}")
125
    
126
    model_id = VERTEX_MODELS[model_name]
127
    
128
    message = client.messages.create(
129
        model=model_id,
130
        max_tokens=1024,
131
        messages=[
132
            {"role": "user", "content": prompt}
133
        ]
134
    )
135
    
136
    return message.content[0].text
137

138
# Usage
139
response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")
140
print(response)
141
```
142

143
### Multi-Region Vertex AI
144

145
```python
146
class VertexMultiRegion:
147
    """Manage Vertex AI clients across multiple Google Cloud regions"""
148
    
149
    def __init__(self, project_id: str, regions: List[str]):
150
        self.project_id = project_id
151
        self.clients = {}
152
        for region in regions:
153
            self.clients[region] = AnthropicVertex(
154
                project_id=project_id,
155
                region=region
156
            )
157
    
158
    def create_message(self, region: str, **kwargs) -> Any:
159
        """Create message in specific region"""
160
        if region not in self.clients:
161
            raise ValueError(f"Region {region} not configured")
162
        
163
        return self.clients[region].messages.create(**kwargs)
164
    
165
    def find_best_region(self, model: str) -> str:
166
        """Find best region for a model (simplified example)"""
167
        # In practice, you'd check model availability per region
168
        region_preferences = {
169
            "claude-opus": "us-central1",    # Largest models in central region
170
            "claude-sonnet": "us-west1",     # Balanced models in west
171
            "claude-haiku": "us-east1"       # Fast models in east
172
        }
173
        
174
        for model_type, preferred_region in region_preferences.items():
175
            if model_type in model.lower():
176
                return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]
177
        
178
        return list(self.clients.keys())[0]  # Default to first region
179

180
# Usage
181
multi_region = VertexMultiRegion(
182
    project_id="your-project-id",
183
    regions=["us-central1", "us-west1", "us-east1"]
184
)
185

186
best_region = multi_region.find_best_region("claude-sonnet-4")
187
message = multi_region.create_message(
188
    region=best_region,
189
    model="claude-sonnet-4-20250514",
190
    max_tokens=1024,
191
    messages=[{"role": "user", "content": "Hello from multi-region!"}]
192
)
193
```
194

195
### Async Vertex AI Usage
196

197
```python
198
import asyncio
199
from anthropic import AsyncAnthropicVertex
200

201
async def vertex_async_example():
202
    # Create async Vertex AI client
203
    async_client = AsyncAnthropicVertex(
204
        project_id="your-project-id",
205
        region="us-central1"
206
    )
207
    
208
    # Async message creation
209
    message = await async_client.messages.create(
210
        model="claude-sonnet-4-20250514",
211
        max_tokens=1024,
212
        messages=[
213
            {"role": "user", "content": "Async Vertex AI request"}
214
        ]
215
    )
216
    
217
    return message.content[0].text
218

219
# Run async
220
result = asyncio.run(vertex_async_example())
221
print(f"Async Vertex AI result: {result}")
222
```
223

224
### Vertex AI Error Handling
225

226
```python
227
from google.api_core import exceptions as gcp_exceptions
228
from anthropic import AnthropicVertex, APIError
229

230
def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:
231
    """Make Vertex AI request with robust error handling"""
232
    
233
    for attempt in range(max_retries):
234
        try:
235
            client = AnthropicVertex(
236
                project_id="your-project-id",
237
                region="us-central1"
238
            )
239
            
240
            message = client.messages.create(
241
                model="claude-sonnet-4-20250514",
242
                max_tokens=1024,
243
                messages=[
244
                    {"role": "user", "content": prompt}
245
                ]
246
            )
247
            
248
            return message.content[0].text
249
            
250
        except gcp_exceptions.Unauthenticated:
251
            print("❌ Google Cloud authentication failed")
252
            print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")
253
            return None
254
            
255
        except gcp_exceptions.PermissionDenied:
256
            print("❌ Permission denied for Vertex AI")
257
            print("Check IAM permissions for Vertex AI access")
258
            return None
259
            
260
        except gcp_exceptions.ResourceExhausted:
261
            print(f"⏳ Quota exceeded (attempt {attempt + 1})")
262
            if attempt < max_retries - 1:
263
                time.sleep(2 ** attempt)
264
                continue
265
            print("❌ Quota exceeded. Max retries reached.")
266
            return None
267
            
268
        except gcp_exceptions.DeadlineExceeded:
269
            print(f"⏰ Request timeout (attempt {attempt + 1})")
270
            if attempt < max_retries - 1:
271
                continue
272
            print("❌ Request timeout. Max retries reached.")
273
            return None
274
            
275
        except gcp_exceptions.ServiceUnavailable:
276
            print(f"🔥 Service unavailable (attempt {attempt + 1})")
277
            if attempt < max_retries - 1:
278
                time.sleep(5)
279
                continue
280
            print("❌ Service unavailable. Max retries reached.")
281
            return None
282
            
283
        except gcp_exceptions.NotFound:
284
            print("❌ Model or resource not found")
285
            print("Check model name and project configuration")
286
            return None
287
            
288
        except APIError as e:
289
            print(f"❌ Anthropic API error: {e}")
290
            return None
291
            
292
        except Exception as e:
293
            print(f"❌ Unexpected error: {e}")
294
            return None
295
    
296
    print("❌ Max retries reached")
297
    return None
298

299
# Usage
300
result = robust_vertex_request("What are the benefits of using Google Vertex AI?")
301
if result:
302
    print(f"Success: {result}")
303
```
304

305
### Vertex AI Configuration Management
306

307
```python
308
import json
309
from google.cloud import aiplatform
310
from typing import Dict, Any
311

312
class VertexConfig:
313
    """Configuration management for Vertex AI deployment"""
314
    
315
    def __init__(self, config_file: str = "vertex-config.json"):
316
        self.config_file = config_file
317
        self.config = self.load_config()
318
    
319
    def load_config(self) -> Dict[str, Any]:
320
        """Load configuration from file"""
321
        try:
322
            with open(self.config_file, 'r') as f:
323
                return json.load(f)
324
        except FileNotFoundError:
325
            return self.default_config()
326
    
327
    def default_config(self) -> Dict[str, Any]:
328
        """Default configuration"""
329
        return {
330
            "project_id": "your-project-id",
331
            "regions": ["us-central1", "us-west1"],
332
            "models": {
333
                "fast": "claude-haiku-3-20241022",
334
                "balanced": "claude-sonnet-4-20250514",
335
                "powerful": "claude-opus-3-20240229"
336
            },
337
            "max_tokens": 1024,
338
            "timeout": 30,
339
            "max_retries": 3
340
        }
341
    
342
    def create_client(self, region: str = None) -> AnthropicVertex:
343
        """Create configured Vertex AI client"""
344
        region = region or self.config["regions"][0]
345
        
346
        return AnthropicVertex(
347
            project_id=self.config["project_id"],
348
            region=region,
349
            timeout=self.config["timeout"],
350
            max_retries=self.config["max_retries"]
351
        )
352
    
353
    def create_message(self, prompt: str, model_type: str = "balanced") -> str:
354
        """Create message with configured defaults"""
355
        client = self.create_client()
356
        model = self.config["models"].get(model_type, self.config["models"]["balanced"])
357
        
358
        message = client.messages.create(
359
            model=model,
360
            max_tokens=self.config["max_tokens"],
361
            messages=[
362
                {"role": "user", "content": prompt}
363
            ]
364
        )
365
        
366
        return message.content[0].text
367

368
# Usage
369
config = VertexConfig()
370

371
# Quick message with defaults
372
response = config.create_message("Explain machine learning", model_type="fast")
373
print(response)
374

375
# Create client for custom usage
376
client = config.create_client(region="us-west1")
377
```
378

379
### Vertex AI with Service Account Impersonation
380

381
```python
382
from google.oauth2 import service_account
383
from google.auth import impersonated_credentials
384
from anthropic import AnthropicVertex
385

386
def create_impersonated_vertex_client(
387
    source_credentials_file: str,
388
    target_service_account: str,
389
    project_id: str,
390
    region: str
391
) -> AnthropicVertex:
392
    """Create Vertex AI client with service account impersonation"""
393
    
394
    # Load source credentials
395
    source_credentials = service_account.Credentials.from_service_account_file(
396
        source_credentials_file
397
    )
398
    
399
    # Create impersonated credentials
400
    target_credentials = impersonated_credentials.Credentials(
401
        source_credentials=source_credentials,
402
        target_principal=target_service_account,
403
        target_scopes=["https://www.googleapis.com/auth/cloud-platform"]
404
    )
405
    
406
    # Note: Direct credential passing may require additional setup
407
    # This is a conceptual example
408
    return AnthropicVertex(
409
        project_id=project_id,
410
        region=region
411
    )
412

413
# Usage
414
impersonated_client = create_impersonated_vertex_client(
415
    source_credentials_file="/path/to/source-credentials.json",
416
    target_service_account="target-sa@project.iam.gserviceaccount.com",
417
    project_id="your-project-id",
418
    region="us-central1"
419
)
420
```
421

422
### Vertex AI Cost Monitoring
423

424
```python
425
import time
426
from typing import Dict, List
427
from datetime import datetime
428

429
class VertexCostMonitor:
430
    """Monitor and optimize Vertex AI usage costs"""
431
    
432
    def __init__(self, project_id: str):
433
        self.project_id = project_id
434
        self.usage_stats = {}
435
        self.model_costs = {
436
            # Example costs per 1K tokens (input/output)
437
            "claude-haiku-3-20241022": (0.00025, 0.00125),
438
            "claude-sonnet-4-20250514": (0.003, 0.015),
439
            "claude-opus-3-20240229": (0.015, 0.075)
440
        }
441
    
442
    def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:
443
        """Select model based on cost-effectiveness"""
444
        
445
        if quality_requirement == "minimal" or len(prompt) < 500:
446
            return "claude-haiku-3-20241022"
447
        elif quality_requirement == "maximum":
448
            return "claude-opus-3-20240229"
449
        else:
450
            return "claude-sonnet-4-20250514"
451
    
452
    def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
453
        """Estimate cost for request"""
454
        if model not in self.model_costs:
455
            return 0.0
456
        
457
        input_cost, output_cost = self.model_costs[model]
458
        return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)
459
    
460
    def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:
461
        """Create message with cost monitoring"""
462
        
463
        # Select cost-effective model
464
        model = self.select_cost_effective_model(
465
            prompt, 
466
            kwargs.get("quality_requirement", "balanced")
467
        )
468
        
469
        # Create message
470
        start_time = time.time()
471
        message = client.messages.create(
472
            model=model,
473
            messages=[{"role": "user", "content": prompt}],
474
            **{k: v for k, v in kwargs.items() if k != "quality_requirement"}
475
        )
476
        duration = time.time() - start_time
477
        
478
        # Track usage
479
        usage = message.usage
480
        cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)
481
        
482
        # Update statistics
483
        if model not in self.usage_stats:
484
            self.usage_stats[model] = {
485
                "requests": 0,
486
                "total_cost": 0.0,
487
                "total_tokens": 0,
488
                "avg_duration": 0.0
489
            }
490
        
491
        stats = self.usage_stats[model]
492
        stats["requests"] += 1
493
        stats["total_cost"] += cost
494
        stats["total_tokens"] += usage.input_tokens + usage.output_tokens
495
        stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]
496
        
497
        return message, {
498
            "model": model,
499
            "cost": cost,
500
            "duration": duration,
501
            "tokens": usage.input_tokens + usage.output_tokens,
502
            "cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0
503
        }
504
    
505
    def get_cost_summary(self) -> Dict[str, Any]:
506
        """Get cost usage summary"""
507
        total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())
508
        total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
509
        
510
        return {
511
            "total_cost": total_cost,
512
            "total_requests": total_requests,
513
            "avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,
514
            "model_breakdown": self.usage_stats
515
        }
516

517
# Usage
518
monitor = VertexCostMonitor("your-project-id")
519
client = AnthropicVertex(
520
    project_id="your-project-id",
521
    region="us-central1"
522
)
523

524
message, stats = monitor.create_monitored_message(
525
    client,
526
    "Explain the benefits of cloud computing in detail",
527
    max_tokens=500,
528
    quality_requirement="balanced"
529
)
530

531
print(f"Model: {stats['model']}")
532
print(f"Cost: ${stats['cost']:.6f}")
533
print(f"Cost per token: ${stats['cost_per_token']:.8f}")
534
print(f"Duration: {stats['duration']:.2f}s")
535
print(f"Response: {message.content[0].text[:100]}...")
536

537
# Get overall cost summary
538
summary = monitor.get_cost_summary()
539
print(f"\nCost Summary:")
540
print(f"Total cost: ${summary['total_cost']:.6f}")
541
print(f"Total requests: {summary['total_requests']}")
542
print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")
543
```
544

545
### Vertex AI with Streaming
546

547
```python
548
# Streaming with Vertex AI
549
with client.messages.stream(
550
    model="claude-sonnet-4-20250514",
551
    max_tokens=1024,
552
    messages=[
553
        {"role": "user", "content": "Write a story about AI on Google Cloud"}
554
    ]
555
) as stream:
556
    for text in stream.text_stream:
557
        print(text, end="", flush=True)
558
```
559

560
### Vertex AI Deployment Patterns
561

562
```python
563
class VertexDeployment:
564
    """Production deployment patterns for Vertex AI"""
565
    
566
    def __init__(self, project_id: str, environment: str = "production"):
567
        self.project_id = project_id
568
        self.environment = environment
569
        self.clients = self._create_clients()
570
    
571
    def _create_clients(self) -> Dict[str, AnthropicVertex]:
572
        """Create clients for different regions/purposes"""
573
        
574
        configs = {
575
            "primary": {
576
                "region": "us-central1",
577
                "timeout": 60,
578
                "max_retries": 5
579
            },
580
            "fallback": {
581
                "region": "us-west1", 
582
                "timeout": 45,
583
                "max_retries": 3
584
            },
585
            "dev": {
586
                "region": "us-east1",
587
                "timeout": 30,
588
                "max_retries": 2
589
            }
590
        }
591
        
592
        clients = {}
593
        for name, config in configs.items():
594
            if self.environment == "development" and name != "dev":
595
                continue
596
                
597
            clients[name] = AnthropicVertex(
598
                project_id=self.project_id,
599
                region=config["region"],
600
                timeout=config["timeout"],
601
                max_retries=config["max_retries"]
602
            )
603
        
604
        return clients
605
    
606
    def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:
607
        """Create message with automatic fallback"""
608
        
609
        client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]
610
        
611
        for client_name in client_order:
612
            if client_name not in self.clients:
613
                continue
614
                
615
            try:
616
                print(f"Trying {client_name} client...")
617
                message = self.clients[client_name].messages.create(
618
                    messages=[{"role": "user", "content": prompt}],
619
                    **kwargs
620
                )
621
                print(f"✅ Success with {client_name} client")
622
                return message.content[0].text
623
                
624
            except Exception as e:
625
                print(f"❌ {client_name} client failed: {e}")
626
                continue
627
        
628
        print("❌ All clients failed")
629
        return None
630

631
# Usage
632
deployment = VertexDeployment("your-project-id", "production")
633

634
response = deployment.create_message_with_fallback(
635
    "Explain quantum computing",
636
    model="claude-sonnet-4-20250514",
637
    max_tokens=1024
638
)
639

640
if response:
641
    print(f"Response: {response}")
642
```