The official Python library for the anthropic API
—
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Pending
The risk profile of this skill
Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.
Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.
class AnthropicVertex:
def __init__(
self,
*,
project_id: Optional[str] = None,
region: Optional[str] = None,
**kwargs
): ...
messages: Messages
completions: Completions
class AsyncAnthropicVertex:
def __init__(
self,
*,
project_id: Optional[str] = None,
region: Optional[str] = None,
**kwargs
): ...
messages: AsyncMessages
completions: AsyncCompletionsfrom anthropic import AnthropicVertex
# Basic configuration with project ID and region
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Using environment variables for configuration
import os
os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"
os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"
client = AnthropicVertex()import os
from google.oauth2 import service_account
from anthropic import AnthropicVertex
# Method 1: Service Account Key File
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Method 2: Service Account from Code
credentials = service_account.Credentials.from_service_account_file(
"/path/to/service-account-key.json"
)
# Note: Direct credential passing may require additional configuration
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Method 3: Default Application Credentials (recommended for GCP environments)
# This automatically uses credentials from:
# - Environment variable GOOGLE_APPLICATION_CREDENTIALS
# - gcloud CLI default credentials
# - Google Cloud metadata service (when running on GCP)
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)# Create message using Vertex AI
message = client.messages.create(
model="claude-sonnet-4-20250514", # Vertex AI model name
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello from Google Vertex AI!"}
]
)
print(message.content[0].text)# Available Claude models on Vertex AI (example model names)
VERTEX_MODELS = {
"claude-sonnet-4": "claude-sonnet-4-20250514",
"claude-haiku-3": "claude-haiku-3-20241022",
"claude-opus-3": "claude-opus-3-20240229"
}
def create_vertex_message(model_name: str, prompt: str) -> str:
"""Create message with Vertex AI model"""
if model_name not in VERTEX_MODELS:
raise ValueError(f"Unknown model: {model_name}")
model_id = VERTEX_MODELS[model_name]
message = client.messages.create(
model=model_id,
max_tokens=1024,
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
# Usage
response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")
print(response)class VertexMultiRegion:
"""Manage Vertex AI clients across multiple Google Cloud regions"""
def __init__(self, project_id: str, regions: List[str]):
self.project_id = project_id
self.clients = {}
for region in regions:
self.clients[region] = AnthropicVertex(
project_id=project_id,
region=region
)
def create_message(self, region: str, **kwargs) -> Any:
"""Create message in specific region"""
if region not in self.clients:
raise ValueError(f"Region {region} not configured")
return self.clients[region].messages.create(**kwargs)
def find_best_region(self, model: str) -> str:
"""Find best region for a model (simplified example)"""
# In practice, you'd check model availability per region
region_preferences = {
"claude-opus": "us-central1", # Largest models in central region
"claude-sonnet": "us-west1", # Balanced models in west
"claude-haiku": "us-east1" # Fast models in east
}
for model_type, preferred_region in region_preferences.items():
if model_type in model.lower():
return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]
return list(self.clients.keys())[0] # Default to first region
# Usage
multi_region = VertexMultiRegion(
project_id="your-project-id",
regions=["us-central1", "us-west1", "us-east1"]
)
best_region = multi_region.find_best_region("claude-sonnet-4")
message = multi_region.create_message(
region=best_region,
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello from multi-region!"}]
)import asyncio
from anthropic import AsyncAnthropicVertex
async def vertex_async_example():
# Create async Vertex AI client
async_client = AsyncAnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
# Async message creation
message = await async_client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": "Async Vertex AI request"}
]
)
return message.content[0].text
# Run async
result = asyncio.run(vertex_async_example())
print(f"Async Vertex AI result: {result}")from google.api_core import exceptions as gcp_exceptions
from anthropic import AnthropicVertex, APIError
def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:
"""Make Vertex AI request with robust error handling"""
for attempt in range(max_retries):
try:
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
message = client.messages.create(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
except gcp_exceptions.Unauthenticated:
print("❌ Google Cloud authentication failed")
print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")
return None
except gcp_exceptions.PermissionDenied:
print("❌ Permission denied for Vertex AI")
print("Check IAM permissions for Vertex AI access")
return None
except gcp_exceptions.ResourceExhausted:
print(f"⏳ Quota exceeded (attempt {attempt + 1})")
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
continue
print("❌ Quota exceeded. Max retries reached.")
return None
except gcp_exceptions.DeadlineExceeded:
print(f"⏰ Request timeout (attempt {attempt + 1})")
if attempt < max_retries - 1:
continue
print("❌ Request timeout. Max retries reached.")
return None
except gcp_exceptions.ServiceUnavailable:
print(f"🔥 Service unavailable (attempt {attempt + 1})")
if attempt < max_retries - 1:
time.sleep(5)
continue
print("❌ Service unavailable. Max retries reached.")
return None
except gcp_exceptions.NotFound:
print("❌ Model or resource not found")
print("Check model name and project configuration")
return None
except APIError as e:
print(f"❌ Anthropic API error: {e}")
return None
except Exception as e:
print(f"❌ Unexpected error: {e}")
return None
print("❌ Max retries reached")
return None
# Usage
result = robust_vertex_request("What are the benefits of using Google Vertex AI?")
if result:
print(f"Success: {result}")import json
from google.cloud import aiplatform
from typing import Dict, Any
class VertexConfig:
"""Configuration management for Vertex AI deployment"""
def __init__(self, config_file: str = "vertex-config.json"):
self.config_file = config_file
self.config = self.load_config()
def load_config(self) -> Dict[str, Any]:
"""Load configuration from file"""
try:
with open(self.config_file, 'r') as f:
return json.load(f)
except FileNotFoundError:
return self.default_config()
def default_config(self) -> Dict[str, Any]:
"""Default configuration"""
return {
"project_id": "your-project-id",
"regions": ["us-central1", "us-west1"],
"models": {
"fast": "claude-haiku-3-20241022",
"balanced": "claude-sonnet-4-20250514",
"powerful": "claude-opus-3-20240229"
},
"max_tokens": 1024,
"timeout": 30,
"max_retries": 3
}
def create_client(self, region: str = None) -> AnthropicVertex:
"""Create configured Vertex AI client"""
region = region or self.config["regions"][0]
return AnthropicVertex(
project_id=self.config["project_id"],
region=region,
timeout=self.config["timeout"],
max_retries=self.config["max_retries"]
)
def create_message(self, prompt: str, model_type: str = "balanced") -> str:
"""Create message with configured defaults"""
client = self.create_client()
model = self.config["models"].get(model_type, self.config["models"]["balanced"])
message = client.messages.create(
model=model,
max_tokens=self.config["max_tokens"],
messages=[
{"role": "user", "content": prompt}
]
)
return message.content[0].text
# Usage
config = VertexConfig()
# Quick message with defaults
response = config.create_message("Explain machine learning", model_type="fast")
print(response)
# Create client for custom usage
client = config.create_client(region="us-west1")from google.oauth2 import service_account
from google.auth import impersonated_credentials
from anthropic import AnthropicVertex
def create_impersonated_vertex_client(
source_credentials_file: str,
target_service_account: str,
project_id: str,
region: str
) -> AnthropicVertex:
"""Create Vertex AI client with service account impersonation"""
# Load source credentials
source_credentials = service_account.Credentials.from_service_account_file(
source_credentials_file
)
# Create impersonated credentials
target_credentials = impersonated_credentials.Credentials(
source_credentials=source_credentials,
target_principal=target_service_account,
target_scopes=["https://www.googleapis.com/auth/cloud-platform"]
)
# Note: Direct credential passing may require additional setup
# This is a conceptual example
return AnthropicVertex(
project_id=project_id,
region=region
)
# Usage
impersonated_client = create_impersonated_vertex_client(
source_credentials_file="/path/to/source-credentials.json",
target_service_account="target-sa@project.iam.gserviceaccount.com",
project_id="your-project-id",
region="us-central1"
)import time
from typing import Dict, List
from datetime import datetime
class VertexCostMonitor:
"""Monitor and optimize Vertex AI usage costs"""
def __init__(self, project_id: str):
self.project_id = project_id
self.usage_stats = {}
self.model_costs = {
# Example costs per 1K tokens (input/output)
"claude-haiku-3-20241022": (0.00025, 0.00125),
"claude-sonnet-4-20250514": (0.003, 0.015),
"claude-opus-3-20240229": (0.015, 0.075)
}
def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:
"""Select model based on cost-effectiveness"""
if quality_requirement == "minimal" or len(prompt) < 500:
return "claude-haiku-3-20241022"
elif quality_requirement == "maximum":
return "claude-opus-3-20240229"
else:
return "claude-sonnet-4-20250514"
def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
"""Estimate cost for request"""
if model not in self.model_costs:
return 0.0
input_cost, output_cost = self.model_costs[model]
return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)
def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:
"""Create message with cost monitoring"""
# Select cost-effective model
model = self.select_cost_effective_model(
prompt,
kwargs.get("quality_requirement", "balanced")
)
# Create message
start_time = time.time()
message = client.messages.create(
model=model,
messages=[{"role": "user", "content": prompt}],
**{k: v for k, v in kwargs.items() if k != "quality_requirement"}
)
duration = time.time() - start_time
# Track usage
usage = message.usage
cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)
# Update statistics
if model not in self.usage_stats:
self.usage_stats[model] = {
"requests": 0,
"total_cost": 0.0,
"total_tokens": 0,
"avg_duration": 0.0
}
stats = self.usage_stats[model]
stats["requests"] += 1
stats["total_cost"] += cost
stats["total_tokens"] += usage.input_tokens + usage.output_tokens
stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]
return message, {
"model": model,
"cost": cost,
"duration": duration,
"tokens": usage.input_tokens + usage.output_tokens,
"cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0
}
def get_cost_summary(self) -> Dict[str, Any]:
"""Get cost usage summary"""
total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())
total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
return {
"total_cost": total_cost,
"total_requests": total_requests,
"avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,
"model_breakdown": self.usage_stats
}
# Usage
monitor = VertexCostMonitor("your-project-id")
client = AnthropicVertex(
project_id="your-project-id",
region="us-central1"
)
message, stats = monitor.create_monitored_message(
client,
"Explain the benefits of cloud computing in detail",
max_tokens=500,
quality_requirement="balanced"
)
print(f"Model: {stats['model']}")
print(f"Cost: ${stats['cost']:.6f}")
print(f"Cost per token: ${stats['cost_per_token']:.8f}")
print(f"Duration: {stats['duration']:.2f}s")
print(f"Response: {message.content[0].text[:100]}...")
# Get overall cost summary
summary = monitor.get_cost_summary()
print(f"\nCost Summary:")
print(f"Total cost: ${summary['total_cost']:.6f}")
print(f"Total requests: {summary['total_requests']}")
print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")# Streaming with Vertex AI
with client.messages.stream(
model="claude-sonnet-4-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": "Write a story about AI on Google Cloud"}
]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)class VertexDeployment:
"""Production deployment patterns for Vertex AI"""
def __init__(self, project_id: str, environment: str = "production"):
self.project_id = project_id
self.environment = environment
self.clients = self._create_clients()
def _create_clients(self) -> Dict[str, AnthropicVertex]:
"""Create clients for different regions/purposes"""
configs = {
"primary": {
"region": "us-central1",
"timeout": 60,
"max_retries": 5
},
"fallback": {
"region": "us-west1",
"timeout": 45,
"max_retries": 3
},
"dev": {
"region": "us-east1",
"timeout": 30,
"max_retries": 2
}
}
clients = {}
for name, config in configs.items():
if self.environment == "development" and name != "dev":
continue
clients[name] = AnthropicVertex(
project_id=self.project_id,
region=config["region"],
timeout=config["timeout"],
max_retries=config["max_retries"]
)
return clients
def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:
"""Create message with automatic fallback"""
client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]
for client_name in client_order:
if client_name not in self.clients:
continue
try:
print(f"Trying {client_name} client...")
message = self.clients[client_name].messages.create(
messages=[{"role": "user", "content": prompt}],
**kwargs
)
print(f"✅ Success with {client_name} client")
return message.content[0].text
except Exception as e:
print(f"❌ {client_name} client failed: {e}")
continue
print("❌ All clients failed")
return None
# Usage
deployment = VertexDeployment("your-project-id", "production")
response = deployment.create_message_with_fallback(
"Explain quantum computing",
model="claude-sonnet-4-20250514",
max_tokens=1024
)
if response:
print(f"Response: {response}")