CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-anthropic

The official Python library for the anthropic API

Pending
Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

SecuritybySnyk

Pending

The risk profile of this skill

Overview
Eval results
Files

vertex.mddocs/

Google Vertex AI Integration

Specialized client for accessing Claude models through Google Cloud Vertex AI, with Google Cloud authentication and Vertex-specific configurations. This integration allows you to use Claude models within your Google Cloud infrastructure.

Capabilities

Vertex AI Client Classes

Synchronous and asynchronous clients for Google Vertex AI integration with Claude models.

class AnthropicVertex:
    def __init__(
        self,
        *,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        **kwargs
    ): ...
    
    messages: Messages
    completions: Completions

class AsyncAnthropicVertex:
    def __init__(
        self,
        *,
        project_id: Optional[str] = None,
        region: Optional[str] = None,
        **kwargs
    ): ...
    
    messages: AsyncMessages
    completions: AsyncCompletions

Usage Examples

Basic Vertex AI Setup

from anthropic import AnthropicVertex

# Basic configuration with project ID and region
client = AnthropicVertex(
    project_id="your-project-id",
    region="us-central1"
)

# Using environment variables for configuration
import os
os.environ["GOOGLE_CLOUD_PROJECT"] = "your-project-id"
os.environ["GOOGLE_CLOUD_REGION"] = "us-central1"

client = AnthropicVertex()

Google Cloud Authentication

import os
from google.oauth2 import service_account
from anthropic import AnthropicVertex

# Method 1: Service Account Key File
os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "/path/to/service-account-key.json"

client = AnthropicVertex(
    project_id="your-project-id",
    region="us-central1"
)

# Method 2: Service Account from Code
credentials = service_account.Credentials.from_service_account_file(
    "/path/to/service-account-key.json"
)

# Note: Direct credential passing may require additional configuration
client = AnthropicVertex(
    project_id="your-project-id",
    region="us-central1"
)

# Method 3: Default Application Credentials (recommended for GCP environments)
# This automatically uses credentials from:
# - Environment variable GOOGLE_APPLICATION_CREDENTIALS
# - gcloud CLI default credentials
# - Google Cloud metadata service (when running on GCP)
client = AnthropicVertex(
    project_id="your-project-id",
    region="us-central1"
)

Messages with Vertex AI

# Create message using Vertex AI
message = client.messages.create(
    model="claude-sonnet-4-20250514",  # Vertex AI model name
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Hello from Google Vertex AI!"}
    ]
)

print(message.content[0].text)

Vertex AI Model Selection

# Available Claude models on Vertex AI (example model names)
VERTEX_MODELS = {
    "claude-sonnet-4": "claude-sonnet-4-20250514",
    "claude-haiku-3": "claude-haiku-3-20241022",
    "claude-opus-3": "claude-opus-3-20240229"
}

def create_vertex_message(model_name: str, prompt: str) -> str:
    """Create message with Vertex AI model"""
    
    if model_name not in VERTEX_MODELS:
        raise ValueError(f"Unknown model: {model_name}")
    
    model_id = VERTEX_MODELS[model_name]
    
    message = client.messages.create(
        model=model_id,
        max_tokens=1024,
        messages=[
            {"role": "user", "content": prompt}
        ]
    )
    
    return message.content[0].text

# Usage
response = create_vertex_message("claude-sonnet-4", "What is Google Vertex AI?")
print(response)

Multi-Region Vertex AI

class VertexMultiRegion:
    """Manage Vertex AI clients across multiple Google Cloud regions"""
    
    def __init__(self, project_id: str, regions: List[str]):
        self.project_id = project_id
        self.clients = {}
        for region in regions:
            self.clients[region] = AnthropicVertex(
                project_id=project_id,
                region=region
            )
    
    def create_message(self, region: str, **kwargs) -> Any:
        """Create message in specific region"""
        if region not in self.clients:
            raise ValueError(f"Region {region} not configured")
        
        return self.clients[region].messages.create(**kwargs)
    
    def find_best_region(self, model: str) -> str:
        """Find best region for a model (simplified example)"""
        # In practice, you'd check model availability per region
        region_preferences = {
            "claude-opus": "us-central1",    # Largest models in central region
            "claude-sonnet": "us-west1",     # Balanced models in west
            "claude-haiku": "us-east1"       # Fast models in east
        }
        
        for model_type, preferred_region in region_preferences.items():
            if model_type in model.lower():
                return preferred_region if preferred_region in self.clients else list(self.clients.keys())[0]
        
        return list(self.clients.keys())[0]  # Default to first region

# Usage
multi_region = VertexMultiRegion(
    project_id="your-project-id",
    regions=["us-central1", "us-west1", "us-east1"]
)

best_region = multi_region.find_best_region("claude-sonnet-4")
message = multi_region.create_message(
    region=best_region,
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[{"role": "user", "content": "Hello from multi-region!"}]
)

Async Vertex AI Usage

import asyncio
from anthropic import AsyncAnthropicVertex

async def vertex_async_example():
    # Create async Vertex AI client
    async_client = AsyncAnthropicVertex(
        project_id="your-project-id",
        region="us-central1"
    )
    
    # Async message creation
    message = await async_client.messages.create(
        model="claude-sonnet-4-20250514",
        max_tokens=1024,
        messages=[
            {"role": "user", "content": "Async Vertex AI request"}
        ]
    )
    
    return message.content[0].text

# Run async
result = asyncio.run(vertex_async_example())
print(f"Async Vertex AI result: {result}")

Vertex AI Error Handling

from google.api_core import exceptions as gcp_exceptions
from anthropic import AnthropicVertex, APIError

def robust_vertex_request(prompt: str, max_retries: int = 3) -> Optional[str]:
    """Make Vertex AI request with robust error handling"""
    
    for attempt in range(max_retries):
        try:
            client = AnthropicVertex(
                project_id="your-project-id",
                region="us-central1"
            )
            
            message = client.messages.create(
                model="claude-sonnet-4-20250514",
                max_tokens=1024,
                messages=[
                    {"role": "user", "content": prompt}
                ]
            )
            
            return message.content[0].text
            
        except gcp_exceptions.Unauthenticated:
            print("❌ Google Cloud authentication failed")
            print("Check GOOGLE_APPLICATION_CREDENTIALS or run 'gcloud auth login'")
            return None
            
        except gcp_exceptions.PermissionDenied:
            print("❌ Permission denied for Vertex AI")
            print("Check IAM permissions for Vertex AI access")
            return None
            
        except gcp_exceptions.ResourceExhausted:
            print(f"⏳ Quota exceeded (attempt {attempt + 1})")
            if attempt < max_retries - 1:
                time.sleep(2 ** attempt)
                continue
            print("❌ Quota exceeded. Max retries reached.")
            return None
            
        except gcp_exceptions.DeadlineExceeded:
            print(f"⏰ Request timeout (attempt {attempt + 1})")
            if attempt < max_retries - 1:
                continue
            print("❌ Request timeout. Max retries reached.")
            return None
            
        except gcp_exceptions.ServiceUnavailable:
            print(f"🔥 Service unavailable (attempt {attempt + 1})")
            if attempt < max_retries - 1:
                time.sleep(5)
                continue
            print("❌ Service unavailable. Max retries reached.")
            return None
            
        except gcp_exceptions.NotFound:
            print("❌ Model or resource not found")
            print("Check model name and project configuration")
            return None
            
        except APIError as e:
            print(f"❌ Anthropic API error: {e}")
            return None
            
        except Exception as e:
            print(f"❌ Unexpected error: {e}")
            return None
    
    print("❌ Max retries reached")
    return None

# Usage
result = robust_vertex_request("What are the benefits of using Google Vertex AI?")
if result:
    print(f"Success: {result}")

Vertex AI Configuration Management

import json
from google.cloud import aiplatform
from typing import Dict, Any

class VertexConfig:
    """Configuration management for Vertex AI deployment"""
    
    def __init__(self, config_file: str = "vertex-config.json"):
        self.config_file = config_file
        self.config = self.load_config()
    
    def load_config(self) -> Dict[str, Any]:
        """Load configuration from file"""
        try:
            with open(self.config_file, 'r') as f:
                return json.load(f)
        except FileNotFoundError:
            return self.default_config()
    
    def default_config(self) -> Dict[str, Any]:
        """Default configuration"""
        return {
            "project_id": "your-project-id",
            "regions": ["us-central1", "us-west1"],
            "models": {
                "fast": "claude-haiku-3-20241022",
                "balanced": "claude-sonnet-4-20250514",
                "powerful": "claude-opus-3-20240229"
            },
            "max_tokens": 1024,
            "timeout": 30,
            "max_retries": 3
        }
    
    def create_client(self, region: str = None) -> AnthropicVertex:
        """Create configured Vertex AI client"""
        region = region or self.config["regions"][0]
        
        return AnthropicVertex(
            project_id=self.config["project_id"],
            region=region,
            timeout=self.config["timeout"],
            max_retries=self.config["max_retries"]
        )
    
    def create_message(self, prompt: str, model_type: str = "balanced") -> str:
        """Create message with configured defaults"""
        client = self.create_client()
        model = self.config["models"].get(model_type, self.config["models"]["balanced"])
        
        message = client.messages.create(
            model=model,
            max_tokens=self.config["max_tokens"],
            messages=[
                {"role": "user", "content": prompt}
            ]
        )
        
        return message.content[0].text

# Usage
config = VertexConfig()

# Quick message with defaults
response = config.create_message("Explain machine learning", model_type="fast")
print(response)

# Create client for custom usage
client = config.create_client(region="us-west1")

Vertex AI with Service Account Impersonation

from google.oauth2 import service_account
from google.auth import impersonated_credentials
from anthropic import AnthropicVertex

def create_impersonated_vertex_client(
    source_credentials_file: str,
    target_service_account: str,
    project_id: str,
    region: str
) -> AnthropicVertex:
    """Create Vertex AI client with service account impersonation"""
    
    # Load source credentials
    source_credentials = service_account.Credentials.from_service_account_file(
        source_credentials_file
    )
    
    # Create impersonated credentials
    target_credentials = impersonated_credentials.Credentials(
        source_credentials=source_credentials,
        target_principal=target_service_account,
        target_scopes=["https://www.googleapis.com/auth/cloud-platform"]
    )
    
    # Note: Direct credential passing may require additional setup
    # This is a conceptual example
    return AnthropicVertex(
        project_id=project_id,
        region=region
    )

# Usage
impersonated_client = create_impersonated_vertex_client(
    source_credentials_file="/path/to/source-credentials.json",
    target_service_account="target-sa@project.iam.gserviceaccount.com",
    project_id="your-project-id",
    region="us-central1"
)

Vertex AI Cost Monitoring

import time
from typing import Dict, List
from datetime import datetime

class VertexCostMonitor:
    """Monitor and optimize Vertex AI usage costs"""
    
    def __init__(self, project_id: str):
        self.project_id = project_id
        self.usage_stats = {}
        self.model_costs = {
            # Example costs per 1K tokens (input/output)
            "claude-haiku-3-20241022": (0.00025, 0.00125),
            "claude-sonnet-4-20250514": (0.003, 0.015),
            "claude-opus-3-20240229": (0.015, 0.075)
        }
    
    def select_cost_effective_model(self, prompt: str, quality_requirement: str = "balanced") -> str:
        """Select model based on cost-effectiveness"""
        
        if quality_requirement == "minimal" or len(prompt) < 500:
            return "claude-haiku-3-20241022"
        elif quality_requirement == "maximum":
            return "claude-opus-3-20240229"
        else:
            return "claude-sonnet-4-20250514"
    
    def estimate_cost(self, model: str, input_tokens: int, output_tokens: int) -> float:
        """Estimate cost for request"""
        if model not in self.model_costs:
            return 0.0
        
        input_cost, output_cost = self.model_costs[model]
        return (input_tokens * input_cost / 1000) + (output_tokens * output_cost / 1000)
    
    def create_monitored_message(self, client: AnthropicVertex, prompt: str, **kwargs) -> tuple:
        """Create message with cost monitoring"""
        
        # Select cost-effective model
        model = self.select_cost_effective_model(
            prompt, 
            kwargs.get("quality_requirement", "balanced")
        )
        
        # Create message
        start_time = time.time()
        message = client.messages.create(
            model=model,
            messages=[{"role": "user", "content": prompt}],
            **{k: v for k, v in kwargs.items() if k != "quality_requirement"}
        )
        duration = time.time() - start_time
        
        # Track usage
        usage = message.usage
        cost = self.estimate_cost(model, usage.input_tokens, usage.output_tokens)
        
        # Update statistics
        if model not in self.usage_stats:
            self.usage_stats[model] = {
                "requests": 0,
                "total_cost": 0.0,
                "total_tokens": 0,
                "avg_duration": 0.0
            }
        
        stats = self.usage_stats[model]
        stats["requests"] += 1
        stats["total_cost"] += cost
        stats["total_tokens"] += usage.input_tokens + usage.output_tokens
        stats["avg_duration"] = (stats["avg_duration"] * (stats["requests"] - 1) + duration) / stats["requests"]
        
        return message, {
            "model": model,
            "cost": cost,
            "duration": duration,
            "tokens": usage.input_tokens + usage.output_tokens,
            "cost_per_token": cost / (usage.input_tokens + usage.output_tokens) if usage.input_tokens + usage.output_tokens > 0 else 0
        }
    
    def get_cost_summary(self) -> Dict[str, Any]:
        """Get cost usage summary"""
        total_cost = sum(stats["total_cost"] for stats in self.usage_stats.values())
        total_requests = sum(stats["requests"] for stats in self.usage_stats.values())
        
        return {
            "total_cost": total_cost,
            "total_requests": total_requests,
            "avg_cost_per_request": total_cost / total_requests if total_requests > 0 else 0,
            "model_breakdown": self.usage_stats
        }

# Usage
monitor = VertexCostMonitor("your-project-id")
client = AnthropicVertex(
    project_id="your-project-id",
    region="us-central1"
)

message, stats = monitor.create_monitored_message(
    client,
    "Explain the benefits of cloud computing in detail",
    max_tokens=500,
    quality_requirement="balanced"
)

print(f"Model: {stats['model']}")
print(f"Cost: ${stats['cost']:.6f}")
print(f"Cost per token: ${stats['cost_per_token']:.8f}")
print(f"Duration: {stats['duration']:.2f}s")
print(f"Response: {message.content[0].text[:100]}...")

# Get overall cost summary
summary = monitor.get_cost_summary()
print(f"\nCost Summary:")
print(f"Total cost: ${summary['total_cost']:.6f}")
print(f"Total requests: {summary['total_requests']}")
print(f"Average cost per request: ${summary['avg_cost_per_request']:.6f}")

Vertex AI with Streaming

# Streaming with Vertex AI
with client.messages.stream(
    model="claude-sonnet-4-20250514",
    max_tokens=1024,
    messages=[
        {"role": "user", "content": "Write a story about AI on Google Cloud"}
    ]
) as stream:
    for text in stream.text_stream:
        print(text, end="", flush=True)

Vertex AI Deployment Patterns

class VertexDeployment:
    """Production deployment patterns for Vertex AI"""
    
    def __init__(self, project_id: str, environment: str = "production"):
        self.project_id = project_id
        self.environment = environment
        self.clients = self._create_clients()
    
    def _create_clients(self) -> Dict[str, AnthropicVertex]:
        """Create clients for different regions/purposes"""
        
        configs = {
            "primary": {
                "region": "us-central1",
                "timeout": 60,
                "max_retries": 5
            },
            "fallback": {
                "region": "us-west1", 
                "timeout": 45,
                "max_retries": 3
            },
            "dev": {
                "region": "us-east1",
                "timeout": 30,
                "max_retries": 2
            }
        }
        
        clients = {}
        for name, config in configs.items():
            if self.environment == "development" and name != "dev":
                continue
                
            clients[name] = AnthropicVertex(
                project_id=self.project_id,
                region=config["region"],
                timeout=config["timeout"],
                max_retries=config["max_retries"]
            )
        
        return clients
    
    def create_message_with_fallback(self, prompt: str, **kwargs) -> Optional[str]:
        """Create message with automatic fallback"""
        
        client_order = ["primary", "fallback"] if self.environment == "production" else ["dev"]
        
        for client_name in client_order:
            if client_name not in self.clients:
                continue
                
            try:
                print(f"Trying {client_name} client...")
                message = self.clients[client_name].messages.create(
                    messages=[{"role": "user", "content": prompt}],
                    **kwargs
                )
                print(f"✅ Success with {client_name} client")
                return message.content[0].text
                
            except Exception as e:
                print(f"❌ {client_name} client failed: {e}")
                continue
        
        print("❌ All clients failed")
        return None

# Usage
deployment = VertexDeployment("your-project-id", "production")

response = deployment.create_message_with_fallback(
    "Explain quantum computing",
    model="claude-sonnet-4-20250514",
    max_tokens=1024
)

if response:
    print(f"Response: {response}")

docs

batching.md

bedrock.md

beta.md

completions.md

configuration.md

errors.md

index.md

messages.md

models.md

streaming.md

tools.md

vertex.md

tile.json