The official Python library for the cerebras API
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Client initialization, configuration, and authentication for both synchronous and asynchronous usage patterns. The SDK provides flexible configuration options including environment variable support, custom timeouts, retry policies, and HTTP client customization.
The primary client class for synchronous API interactions with comprehensive configuration options and automatic API key detection from environment variables.
class Cerebras:
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
default_query: Mapping[str, object] | None = None,
http_client: httpx.Client | None = None,
_strict_response_validation: bool = False,
warm_tcp_connection: bool = True,
) -> None:
"""
Construct a new synchronous Cerebras client instance.
This automatically infers the api_key argument from the CEREBRAS_API_KEY
environment variable if it is not provided.
Parameters:
- api_key: API key for authentication (from CEREBRAS_API_KEY env if None)
- base_url: Override the default base URL for the API
- timeout: Request timeout configuration (float, Timeout object, or NOT_GIVEN)
- max_retries: Maximum number of retries for failed requests
- default_headers: Default headers to include with all requests
- default_query: Default query parameters for all requests
- http_client: Custom httpx.Client instance (DefaultHttpxClient if None)
- _strict_response_validation: Enable strict API response validation
- warm_tcp_connection: Enable TCP connection warming for reduced latency
"""
# Resource properties
chat: chat.ChatResource
completions: completions.CompletionsResource
models: models.ModelsResource
# Response wrapper properties
with_raw_response: CerebrasWithRawResponse
with_streaming_response: CerebrasWithStreamedResponse
# Client configuration
api_key: strThe async client class providing identical functionality to the synchronous client but with async/await support for non-blocking operations.
class AsyncCerebras:
def __init__(
self,
*,
api_key: str | None = None,
base_url: str | httpx.URL | None = None,
timeout: Union[float, Timeout, None, NotGiven] = NOT_GIVEN,
max_retries: int = DEFAULT_MAX_RETRIES,
default_headers: Mapping[str, str] | None = None,
default_query: Mapping[str, object] | None = None,
http_client: httpx.AsyncClient | None = None,
_strict_response_validation: bool = False,
warm_tcp_connection: bool = True,
) -> None:
"""
Construct a new asynchronous Cerebras client instance.
This automatically infers the api_key argument from the CEREBRAS_API_KEY
environment variable if it is not provided.
Parameters:
- api_key: API key for authentication (from CEREBRAS_API_KEY env if None)
- base_url: Override the default base URL for the API
- timeout: Request timeout configuration (float, Timeout object, or NOT_GIVEN)
- max_retries: Maximum number of retries for failed requests
- default_headers: Default headers to include with all requests
- default_query: Default query parameters for all requests
- http_client: Custom httpx.AsyncClient instance (DefaultAsyncHttpxClient if None)
- _strict_response_validation: Enable strict API response validation
- warm_tcp_connection: Enable TCP connection warming for reduced latency
"""
# Resource properties
chat: chat.AsyncChatResource
completions: completions.AsyncCompletionsResource
models: models.AsyncModelsResource
# Response wrapper properties
with_raw_response: AsyncCerebrasWithRawResponse
with_streaming_response: AsyncCerebrasWithStreamedResponse
# Client configuration
api_key: strConvenience aliases for the main client classes to provide alternative naming options.
Client = Cerebras
AsyncClient = AsyncCerebrasClasses that provide access to raw HTTP responses and streaming responses, useful for advanced use cases requiring direct access to response metadata.
class CerebrasWithRawResponse:
"""Wrapper providing access to raw HTTP responses."""
class AsyncCerebrasWithRawResponse:
"""Async wrapper providing access to raw HTTP responses."""
class CerebrasWithStreamedResponse:
"""Wrapper providing access to streaming responses."""
class AsyncCerebrasWithStreamedResponse:
"""Async wrapper providing access to streaming responses."""from cerebras.cloud.sdk import Cerebras
# Using environment variable CEREBRAS_API_KEY
client = Cerebras()
# Explicit API key
client = Cerebras(api_key="your-api-key-here")from cerebras.cloud.sdk import Cerebras, Timeout
import httpx
# Custom timeout configuration
timeout = Timeout(connect=5.0, read=30.0, write=10.0, pool=5.0)
# Custom headers and client configuration
client = Cerebras(
api_key="your-api-key",
timeout=timeout,
max_retries=3,
default_headers={"User-Agent": "MyApp/1.0"},
warm_tcp_connection=True
)import asyncio
from cerebras.cloud.sdk import AsyncCerebras
async def main():
async with AsyncCerebras() as client:
response = await client.chat.completions.create(
model="llama3.1-70b",
messages=[{"role": "user", "content": "Hello!"}]
)
print(response.choices[0].message.content)
asyncio.run(main())import httpx
from cerebras.cloud.sdk import Cerebras, DefaultHttpxClient
# Using custom httpx client with specific configuration
http_client = httpx.Client(
limits=httpx.Limits(max_keepalive_connections=20, max_connections=100),
timeout=httpx.Timeout(30.0)
)
client = Cerebras(
api_key="your-api-key",
http_client=http_client
)from cerebras.cloud.sdk import Cerebras
client = Cerebras()
# Access raw HTTP response
raw_response = client.with_raw_response.chat.completions.create(
model="llama3.1-70b",
messages=[{"role": "user", "content": "Hello!"}]
)
print(f"Status: {raw_response.status_code}")
print(f"Headers: {raw_response.headers}")
parsed_response = raw_response.parse() # Get the ChatCompletion objectAll client operations can raise exceptions from the SDK's exception hierarchy. Common patterns:
from cerebras.cloud.sdk import Cerebras, APIError, RateLimitError, AuthenticationError
client = Cerebras()
try:
response = client.chat.completions.create(
model="llama3.1-70b",
messages=[{"role": "user", "content": "Hello!"}]
)
except AuthenticationError:
print("Invalid API key")
except RateLimitError:
print("Rate limit exceeded")
except APIError as e:
print(f"API error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-cerebras-cloud-sdk