- Spec files
pypi-anthropic
Describes: pkg:pypi/anthropic@0.66.x
- Description
- The official Python library for the anthropic API
- Author
- tessl
- Last updated
vertex.md docs/
1# Google Vertex AI Integration23Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.45## Capabilities67### Vertex AI Client Classes89Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.1011```python { .api }12class AnthropicVertex:13def __init__(14self,15*,16project_id: Optional[str] = None,17region: Optional[str] = None,18**kwargs19): ...2021messages: Messages22completions: Completions2324class AsyncAnthropicVertex:25def __init__(26self,27*,28project_id: Optional[str] = None,29region: Optional[str] = None,30**kwargs31): ...3233messages: AsyncMessages34completions: AsyncCompletions35```3637## Usage Examples3839### Basic Vertex AI Setup4041```python42from anthropic import AnthropicVertex4344# Basic configuration with project ID and region45client = AnthropicVertex(46project_id="your-project-id",47region="us-central1"48)4950# Using environment variables for configuration51import os52os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"53os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"5455client = AnthropicVertex()56```5758### Google Cloud Authentication5960```python61import os62from google.oauth2 import service_account63from anthropic import AnthropicVertex6465# Method 1: Service Account Key File66os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"6768client = AnthropicVertex(69project_id="your-project-id",70region="us-central1"71)7273# Method 2: Service Account from Code74credentials = service_account.Credentials.from_service_account_file(75"/path/to/service-account-key.json"76)7778# Note: Direct credential passing may require additional configuration79client = AnthropicVertex(80project_id="your-project-id",81region="us-central1"82)8384# Method 3: Default Application Credentials (recommended for GCP environments)85# This automatically uses credentials from:86# - Environment variable GOOGLE_APPLICATION_CREDENTIALS87# - gcloud CLI default credentials88# - Google Cloud metadata service (when running on GCP)89client = AnthropicVertex(90project_id="your-project-id",91region="us-central1"92)93```9495### Messages with Vertex AI9697```python98# Create message using Vertex AI99message = client.messages.create(100model="claude-sonnet-4-20250514", # Vertex AI model name101max_tokens=1024,102messages=[103{"role": "user", "content": "Hello from Google Vertex AI!"}104]105)106107print(message.content[0].text)108```109110### Vertex AI Model Selection111112```python113# Available Claude models on Vertex AI (example model names)114VERTEX_MODELS = {115"claude-sonnet-4": "claude-sonnet-4-20250514",116"claude-haiku-3": "claude-haiku-3-20241022",117"claude-opus-3": "claude-opus-3-20240229"118}119120def create_vertex_message(model_name: str, prompt: str) -> str:121"""Create message with Vertex AI model"""122123if model_name not in VERTEX_MODELS:124raise ValueError(f"Unknown model: {model_name}")125126model_id = VERTEX_MODELS[model_name]127128message = client.messages.create(129model=model_id,130max_tokens=1024,131messages=[132{"role": "user", "content": prompt}133]134)135136return message.content[0].text137138# Usage139response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")140print(response)141```142143### Multi-Region Vertex AI144145```python146class VertexMultiRegion:147"""Manage Vertex AI clients across multiple Google Cloud regions"""148149def __init__(self, project_id: str, regions: List[str]):150self.project_id = project_id151self.clients = {}152for region in regions:153self.clients[region] = AnthropicVertex(154project_id=project_id,155region=region156)157158def create_message(self, region: str, **kwargs) -> Any:159"""Create message in specific region"""160if region not in self.clients:161raise ValueError(f"Region {region} not configured")162163return self.clients[region].messages.create(**kwargs)164165def find_best_region(self, model: str) -> str:166"""Find best region for a model (simplified example)"""167# In practice, you'd check model availability per region168region_preferences = {169"claude-opus": "us-central1", # Largest models in central region170"claude-sonnet": "us-west1", # Balanced models in west171"claude-haiku": "us-east1" # Fast models in east172}173174for model_type, preferred_region in region_preferences.items():175if model_type in model.lower():176return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]177178return list(self.clients.keys())[0] # Default to first region179180# Usage181multi_region = VertexMultiRegion(182project_id="your-project-id",183regions=["us-central1", "us-west1", "us-east1"]184)185186best_region = multi_region.find_best_region("claude-sonnet-4")187message = multi_region.create_message(188region=best_region,189model="claude-sonnet-4-20250514",190max_tokens=1024,191messages=[{"role": "user", "content": "Hello from multi-region!"}]192)193```194195### Async Vertex AI Usage196197```python198import asyncio199from anthropic import AsyncAnthropicVertex200201async def vertex_async_example():202# Create async Vertex AI client203async_client = AsyncAnthropicVertex(204project_id="your-project-id",205region="us-central1"206)207208# Async message creation209message = await async_client.messages.create(210model="claude-sonnet-4-20250514",211max_tokens=1024,212messages=[213{"role": "user", "content": "Async Vertex AI request"}214]215)216217return message.content[0].text218219# Run async220result = asyncio.run(vertex_async_example())221print(f"Async Vertex AI result: {result}")222```223224### Vertex AI Error Handling225226```python227from google.api_core import exceptions as gcp_exceptions228from anthropic import AnthropicVertex, APIError229230def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:231"""Make Vertex AI request with robust error handling"""232233for attempt in range(max_retries):234try:235client = AnthropicVertex(236project_id="your-project-id",237region="us-central1"238)239240message = client.messages.create(241model="claude-sonnet-4-20250514",242max_tokens=1024,243messages=[244{"role": "user", "content": prompt}245]246)247248return message.content[0].text249250except gcp_exceptions.Unauthenticated:251print("❌ Google Cloud authentication failed")252print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")253return None254255except gcp_exceptions.PermissionDenied:256print("❌ Permission denied for Vertex AI")257print("Check IAM permissions for Vertex AI access")258return None259260except gcp_exceptions.ResourceExhausted:261print(f"⏳ Quota exceeded (attempt {attempt + 1})")262if attempt < max_retries - 1:263time.sleep(2 ** attempt)264continue265print("❌ Quota exceeded. Max retries reached.")266return None267268except gcp_exceptions.DeadlineExceeded:269print(f"⏰ Request timeout (attempt {attempt + 1})")270if attempt < max_retries - 1:271continue272print("❌ Request timeout. Max retries reached.")273return None274275except gcp_exceptions.ServiceUnavailable:276print(f"🔥 Service unavailable (attempt {attempt + 1})")277if attempt < max_retries - 1:278time.sleep(5)279continue280print("❌ Service unavailable. Max retries reached.")281return None282283except gcp_exceptions.NotFound:284print("❌ Model or resource not found")285print("Check model name and project configuration")286return None287288except APIError as e:289print(f"❌ Anthropic API error: {e}")290return None291292except Exception as e:293print(f"❌ Unexpected error: {e}")294return None295296print("❌ Max retries reached")297return None298299# Usage300result = robust_vertex_request("What are the benefits of using Google Vertex AI?")301if result:302print(f"Success: {result}")303```304305### Vertex AI Configuration Management306307```python308import json309from google.cloud import aiplatform310from typing import Dict, Any311312class VertexConfig:313"""Configuration management for Vertex AI deployment"""314315def __init__(self, config_file: str = "vertex-config.json"):316self.config_file = config_file317self.config = self.load_config()318319def load_config(self) -> Dict[str, Any]:320"""Load configuration from file"""321try:322with open(self.config_file, 'r') as f:323return json.load(f)324except FileNotFoundError:325return self.default_config()326327def default_config(self) -> Dict[str, Any]:328"""Default configuration"""329return {330"project_id": "your-project-id",331"regions": ["us-central1", "us-west1"],332"models": {333"fast": "claude-haiku-3-20241022",334"balanced": "claude-sonnet-4-20250514",335"powerful": "claude-opus-3-20240229"336},337"max_tokens": 1024,338"timeout": 30,339"max_retries": 3340}341342def create_client(self, region: str = None) -> AnthropicVertex:343"""Create configured Vertex AI client"""344region = region or self.config["regions"][0]345346return AnthropicVertex(347project_id=self.config["project_id"],348region=region,349timeout=self.config["timeout"],350max_retries=self.config["max_retries"]351)352353def create_message(self, prompt: str, model_type: str = "balanced") -> str:354"""Create message with configured defaults"""355client = self.create_client()356model = self.config["models"].get(model_type, self.config["models"]["balanced"])357358message = client.messages.create(359model=model,360max_tokens=self.config["max_tokens"],361messages=[362{"role": "user", "content": prompt}363]364)365366return message.content[0].text367368# Usage369config = VertexConfig()370371# Quick message with defaults372response = config.create_message("Explain machine learning", model_type="fast")373print(response)374375# Create client for custom usage376client = config.create_client(region="us-west1")377```378379### Vertex AI with Service Account Impersonation380381```python382from google.oauth2 import service_account383from google.auth import impersonated_credentials384from anthropic import AnthropicVertex385386def create_impersonated_vertex_client(387source_credentials_file: str,388target_service_account: str,389project_id: str,390region: str391) -> AnthropicVertex:392"""Create Vertex AI client with service account impersonation"""393394# Load source credentials395source_credentials = service_account.Credentials.from_service_account_file(396source_credentials_file397)398399# Create impersonated credentials400target_credentials = impersonated_credentials.Credentials(401source_credentials=source_credentials,402target_principal=target_service_account,403target_scopes=["https://www.googleapis.com/auth/cloud-platform"]404)405406# Note: Direct credential passing may require additional setup407# This is a conceptual example408return AnthropicVertex(409project_id=project_id,410region=region411)412413# Usage414impersonated_client = create_impersonated_vertex_client(415source_credentials_file="/path/to/source-credentials.json",416target_service_account="target-sa@project.iam.gserviceaccount.com",417project_id="your-project-id",418region="us-central1"419)420```421422### Vertex AI Cost Monitoring423424```python425import time426from typing import Dict, List427from datetime import datetime428429class VertexCostMonitor:430"""Monitor and optimize Vertex AI usage costs"""431432def __init__(self, project_id: str):433self.project_id = project_id434self.usage_stats = {}435self.model_costs = {436# Example costs per 1K tokens (input/output)437"claude-haiku-3-20241022": (0.00025, 0.00125),438"claude-sonnet-4-20250514": (0.003, 0.015),439"claude-opus-3-20240229": (0.015, 0.075)440}441442def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:443"""Select model based on cost-effectiveness"""444445if quality_requirement == "minimal" or len(prompt) < 500:446return "claude-haiku-3-20241022"447elif quality_requirement == "maximum":448return "claude-opus-3-20240229"449else:450return "claude-sonnet-4-20250514"451452def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:453"""Estimate cost for request"""454if model not in self.model_costs:455return 0.0456457input_cost, output_cost = self.model_costs[model]458return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)459460def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:461"""Create message with cost monitoring"""462463# Select cost-effective model464model = self.select_cost_effective_model(465prompt,466kwargs.get("quality_requirement", "balanced")467)468469# Create message470start_time = time.time()471message = client.messages.create(472model=model,473messages=[{"role": "user", "content": prompt}],474**{k: v for k, v in kwargs.items() if k != "quality_requirement"}475)476duration = time.time() - start_time477478# Track usage479usage = message.usage480cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)481482# Update statistics483if model not in self.usage_stats:484self.usage_stats[model] = {485"requests": 0,486"total_cost": 0.0,487"total_tokens": 0,488"avg_duration": 0.0489}490491stats = self.usage_stats[model]492stats["requests"] += 1493stats["total_cost"] += cost494stats["total_tokens"] += usage.input_tokens + usage.output_tokens495stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]496497return message, {498"model": model,499"cost": cost,500"duration": duration,501"tokens": usage.input_tokens + usage.output_tokens,502"cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0503}504505def get_cost_summary(self) -> Dict[str, Any]:506"""Get cost usage summary"""507total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())508total_requests = sum(stats["requests"] for stats in self.usage_stats.values())509510return {511"total_cost": total_cost,512"total_requests": total_requests,513"avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,514"model_breakdown": self.usage_stats515}516517# Usage518monitor = VertexCostMonitor("your-project-id")519client = AnthropicVertex(520project_id="your-project-id",521region="us-central1"522)523524message, stats = monitor.create_monitored_message(525client,526"Explain the benefits of cloud computing in detail",527max_tokens=500,528quality_requirement="balanced"529)530531print(f"Model: {stats['model']}")532print(f"Cost: ${stats['cost']:.6f}")533print(f"Cost per token: ${stats['cost_per_token']:.8f}")534print(f"Duration: {stats['duration']:.2f}s")535print(f"Response: {message.content[0].text[:100]}...")536537# Get overall cost summary538summary = monitor.get_cost_summary()539print(f"\nCost Summary:")540print(f"Total cost: ${summary['total_cost']:.6f}")541print(f"Total requests: {summary['total_requests']}")542print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")543```544545### Vertex AI with Streaming546547```python548# Streaming with Vertex AI549with client.messages.stream(550model="claude-sonnet-4-20250514",551max_tokens=1024,552messages=[553{"role": "user", "content": "Write a story about AI on Google Cloud"}554]555) as stream:556for text in stream.text_stream:557print(text, end="", flush=True)558```559560### Vertex AI Deployment Patterns561562```python563class VertexDeployment:564"""Production deployment patterns for Vertex AI"""565566def __init__(self, project_id: str, environment: str = "production"):567self.project_id = project_id568self.environment = environment569self.clients = self._create_clients()570571def _create_clients(self) -> Dict[str, AnthropicVertex]:572"""Create clients for different regions/purposes"""573574configs = {575"primary": {576"region": "us-central1",577"timeout": 60,578"max_retries": 5579},580"fallback": {581"region": "us-west1",582"timeout": 45,583"max_retries": 3584},585"dev": {586"region": "us-east1",587"timeout": 30,588"max_retries": 2589}590}591592clients = {}593for name, config in configs.items():594if self.environment == "development" and name != "dev":595continue596597clients[name] = AnthropicVertex(598project_id=self.project_id,599region=config["region"],600timeout=config["timeout"],601max_retries=config["max_retries"]602)603604return clients605606def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:607"""Create message with automatic fallback"""608609client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]610611for client_name in client_order:612if client_name not in self.clients:613continue614615try:616print(f"Trying {client_name} client...")617message = self.clients[client_name].messages.create(618messages=[{"role": "user", "content": prompt}],619**kwargs620)621print(f"✅ Success with {client_name} client")622return message.content[0].text623624except Exception as e:625print(f"❌ {client_name} client failed: {e}")626continue627628print("❌ All clients failed")629return None630631# Usage632deployment = VertexDeployment("your-project-id", "production")633634response = deployment.create_message_with_fallback(635"Explain quantum computing",636model="claude-sonnet-4-20250514",637max_tokens=1024638)639640if response:641print(f"Response: {response}")642```