The official Python library for the anthropic API
—
—
Does it follow best practices?
Impact
—
No eval scenarios have been run
—
The risk profile of this skill
Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.
Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.
class AnthropicVertex:
def __init__(
self,
*,
project_id: Optional[str] = None,
region: Optional[str] = None,
**kwargs
): ...
messages: Messages
completions: Completions
class AsyncAnthropicVertex:
def __init__(
self,
*,
project_id: Optional[str] = None,
region: Optional[str] = None,
**kwargs
): ...
messages: AsyncMessages
completions: AsyncCompletionsfrom anthropic import AnthropicVertex
# Basic configuration with project ID and region
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Using environment variables for configuration
import os
os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"
os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"
client = AnthropicVertex()import os
from google.oauth2 import service_account
from anthropic import AnthropicVertex
# Method 1: Service Account Key File
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Method 2: Service Account from Code
credentials = service_account.Credentials.from_service_account_file(
"/path/to/service-account-key.json"
)
# Note: Direct credential passing may require additional configuration
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Method 3: Default Application Credentials (recommended for GCP environments)
# This automatically uses credentials from:
# - Environment variable GOOGLE_APPLICATION_CREDENTIALS
# - gcloud CLI default credentials
# - Google Cloud metadata service (when running on GCP)
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)# Create message using Vertex AI
message = client.messages.create(
model="claude-sonnet-4-20250514", # Vertex AI model name
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello from Google Vertex AI!"}
]
)
print(message.content[0].text)# Available Claude models on Vertex AI (example model names)
VERTEX_MODELS = {
"claude-sonnet-4": "claude-sonnet-4-20250514",
"claude-haiku-3": "claude-haiku-3-20241022",
"claude-opus-3": "claude-opus-3-20240229"
}
def create_vertex_message(model_name: str, prompt: str) -> str:
"""Create message with Vertex AI model"""
if model_name not in VERTEX_MODELS:
raise ValueError(f"Unknown model: {model_name}")
model_id = VERTEX_MODELS[model_name]
message = client.messages.create(
model=model_id,
max_tokens=1024,
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
# Usage
response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")
print(response)class VertexMultiRegion:
"""Manage Vertex AI clients across multiple Google Cloud regions"""
def __init__(self, project_id: str, regions: List[str]):
self.project_id = project_id
self.clients = {}
for region in regions:
self.clients[region] = AnthropicVertex(
project_id=project_id,
region=region
)
def create_message(self, region: str, **kwargs) -> Any:
"""Create message in specific region"""
if region not in self.clients:
raise ValueError(f"Region {region} not configured")
return self.clients[region].messages.create(**kwargs)
def find_best_region(self, model: str) -> str:
"""Find best region for a model (simplified example)"""
# In practice, you'd check model availability per region
region_preferences = {
"claude-opus": "us-central1", # Largest models in central region
"claude-sonnet": "us-west1", # Balanced models in west
"claude-haiku": "us-east1" # Fast models in east
}
for model_type, preferred_region in region_preferences.items():
if model_type in model.lower():
return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]
return list(self.clients.keys())[0] # Default to first region
# Usage
multi_region = VertexMultiRegion(
project_id="your-project-id",
regions=["us-central1", "us-west1", "us-east1"]
)
best_region = multi_region.find_best_region("claude-sonnet-4")
message = multi_region.create_message(
region=best_region,
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello from multi-region!"}]
)import asyncio
from anthropic import AsyncAnthropicVertex
async def vertex_async_example():
# Create async Vertex AI client
async_client = AsyncAnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Async message creation
message = await async_client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": "Async Vertex AI request"}
]
)
return message.content[0].text
# Run async
result = asyncio.run(vertex_async_example())
print(f"Async Vertex AI result: {result}")from google.api_core import exceptions as gcp_exceptions
from anthropic import AnthropicVertex, APIError
def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:
"""Make Vertex AI request with robust error handling"""
for attempt in range(max_retries):
try:
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
except gcp_exceptions.Unauthenticated:
print("❌ Google Cloud authentication failed")
print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")
return None
except gcp_exceptions.PermissionDenied:
print("❌ Permission denied for Vertex AI")
print("Check IAM permissions for Vertex AI access")
return None
except gcp_exceptions.ResourceExhausted:
print(f"⏳ Quota exceeded (attempt {attempt + 1})")
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
continue
print("❌ Quota exceeded. Max retries reached.")
return None
except gcp_exceptions.DeadlineExceeded:
print(f"⏰ Request timeout (attempt {attempt + 1})")
if attempt < max_retries - 1:
continue
print("❌ Request timeout. Max retries reached.")
return None
except gcp_exceptions.ServiceUnavailable:
print(f"🔥 Service unavailable (attempt {attempt + 1})")
if attempt < max_retries - 1:
time.sleep(5)
continue
print("❌ Service unavailable. Max retries reached.")
return None
except gcp_exceptions.NotFound:
print("❌ Model or resource not found")
print("Check model name and project configuration")
return None
except APIError as e:
print(f"❌ Anthropic API error: {e}")
return None
except Exception as e:
print(f"❌ Unexpected error: {e}")
return None
print("❌ Max retries reached")
return None
# Usage
result = robust_vertex_request("What are the benefits of using Google Vertex AI?")
if result:
print(f"Success: {result}")import json
from google.cloud import aiplatform
from typing import Dict, Any
class VertexConfig:
"""Configuration management for Vertex AI deployment"""
def __init__(self, config_file: str = "vertex-config.json"):
self.config_file = config_file
self.config = self.load_config()
def load_config(self) -> Dict[str, Any]:
"""Load configuration from file"""
try:
with open(self.config_file, 'r') as f:
return json.load(f)
except FileNotFoundError:
return self.default_config()
def default_config(self) -> Dict[str, Any]:
"""Default configuration"""
return {
"project_id": "your-project-id",
"regions": ["us-central1", "us-west1"],
"models": {
"fast": "claude-haiku-3-20241022",
"balanced": "claude-sonnet-4-20250514",
"powerful": "claude-opus-3-20240229"
},
"max_tokens": 1024,
"timeout": 30,
"max_retries": 3
}
def create_client(self, region: str = None) -> AnthropicVertex:
"""Create configured Vertex AI client"""
region = region or self.config["regions"][0]
return AnthropicVertex(
project_id=self.config["project_id"],
region=region,
timeout=self.config["timeout"],
max_retries=self.config["max_retries"]
)
def create_message(self, prompt: str, model_type: str = "balanced") -> str:
"""Create message with configured defaults"""
client = self.create_client()
model = self.config["models"].get(model_type, self.config["models"]["balanced"])
message = client.messages.create(
model=model,
max_tokens=self.config["max_tokens"],
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
# Usage
config = VertexConfig()
# Quick message with defaults
response = config.create_message("Explain machine learning", model_type="fast")
print(response)
# Create client for custom usage
client = config.create_client(region="us-west1")from google.oauth2 import service_account
from google.auth import impersonated_credentials
from anthropic import AnthropicVertex
def create_impersonated_vertex_client(
source_credentials_file: str,
target_service_account: str,
project_id: str,
region: str
) -> AnthropicVertex:
"""Create Vertex AI client with service account impersonation"""
# Load source credentials
source_credentials = service_account.Credentials.from_service_account_file(
source_credentials_file
)
# Create impersonated credentials
target_credentials = impersonated_credentials.Credentials(
source_credentials=source_credentials,
target_principal=target_service_account,
target_scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
# Note: Direct credential passing may require additional setup
# This is a conceptual example
return AnthropicVertex(
project_id=project_id,
region=region
)
# Usage
impersonated_client = create_impersonated_vertex_client(
source_credentials_file="/path/to/source-credentials.json",
target_service_account="target-sa@project.iam.gserviceaccount.com",
project_id="your-project-id",
region="us-central1"
)import time
from typing import Dict, List
from datetime import datetime
class VertexCostMonitor:
"""Monitor and optimize Vertex AI usage costs"""
def __init__(self, project_id: str):
self.project_id = project_id
self.usage_stats = {}
self.model_costs = {
# Example costs per 1K tokens (input/output)
"claude-haiku-3-20241022": (0.00025, 0.00125),
"claude-sonnet-4-20250514": (0.003, 0.015),
"claude-opus-3-20240229": (0.015, 0.075)
}
def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:
"""Select model based on cost-effectiveness"""
if quality_requirement == "minimal" or len(prompt) < 500:
return "claude-haiku-3-20241022"
elif quality_requirement == "maximum":
return "claude-opus-3-20240229"
else:
return "claude-sonnet-4-20250514"
def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
"""Estimate cost for request"""
if model not in self.model_costs:
return 0.0
input_cost, output_cost = self.model_costs[model]
return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)
def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:
"""Create message with cost monitoring"""
# Select cost-effective model
model = self.select_cost_effective_model(
prompt,
kwargs.get("quality_requirement", "balanced")
)
# Create message
start_time = time.time()
message = client.messages.create(
model=model,
messages=[{"role": "user", "content": prompt}],
**{k: v for k, v in kwargs.items() if k != "quality_requirement"}
)
duration = time.time() - start_time
# Track usage
usage = message.usage
cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)
# Update statistics
if model not in self.usage_stats:
self.usage_stats[model] = {
"requests": 0,
"total_cost": 0.0,
"total_tokens": 0,
"avg_duration": 0.0
}
stats = self.usage_stats[model]
stats["requests"] += 1
stats["total_cost"] += cost
stats["total_tokens"] += usage.input_tokens + usage.output_tokens
stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]
return message, {
"model": model,
"cost": cost,
"duration": duration,
"tokens": usage.input_tokens + usage.output_tokens,
"cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0
}
def get_cost_summary(self) -> Dict[str, Any]:
"""Get cost usage summary"""
total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())
total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
return {
"total_cost": total_cost,
"total_requests": total_requests,
"avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,
"model_breakdown": self.usage_stats
}
# Usage
monitor = VertexCostMonitor("your-project-id")
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
message, stats = monitor.create_monitored_message(
client,
"Explain the benefits of cloud computing in detail",
max_tokens=500,
quality_requirement="balanced"
)
print(f"Model: {stats['model']}")
print(f"Cost: ${stats['cost']:.6f}")
print(f"Cost per token: ${stats['cost_per_token']:.8f}")
print(f"Duration: {stats['duration']:.2f}s")
print(f"Response: {message.content[0].text[:100]}...")
# Get overall cost summary
summary = monitor.get_cost_summary()
print(f"\nCost Summary:")
print(f"Total cost: ${summary['total_cost']:.6f}")
print(f"Total requests: {summary['total_requests']}")
print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")# Streaming with Vertex AI
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": "Write a story about AI on Google Cloud"}
]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)class VertexDeployment:
"""Production deployment patterns for Vertex AI"""
def __init__(self, project_id: str, environment: str = "production"):
self.project_id = project_id
self.environment = environment
self.clients = self._create_clients()
def _create_clients(self) -> Dict[str, AnthropicVertex]:
"""Create clients for different regions/purposes"""
configs = {
"primary": {
"region": "us-central1",
"timeout": 60,
"max_retries": 5
},
"fallback": {
"region": "us-west1",
"timeout": 45,
"max_retries": 3
},
"dev": {
"region": "us-east1",
"timeout": 30,
"max_retries": 2
}
}
clients = {}
for name, config in configs.items():
if self.environment == "development" and name != "dev":
continue
clients[name] = AnthropicVertex(
project_id=self.project_id,
region=config["region"],
timeout=config["timeout"],
max_retries=config["max_retries"]
)
return clients
def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:
"""Create message with automatic fallback"""
client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]
for client_name in client_order:
if client_name not in self.clients:
continue
try:
print(f"Trying {client_name} client...")
message = self.clients[client_name].messages.create(
messages=[{"role": "user", "content": prompt}],
**kwargs
)
print(f"✅ Success with {client_name} client")
return message.content[0].text
except Exception as e:
print(f"❌ {client_name} client failed: {e}")
continue
print("❌ All clients failed")
return None
# Usage
deployment = VertexDeployment("your-project-id", "production")
response = deployment.create_message_with_fallback(
"Explain quantum computing",
model="claude-sonnet-4-20250514",
max_tokens=1024
)
if response:
print(f"Response: {response}")