CtrlK
BlogDocsLog inGet started
Tessl Logo

tessl/pypi-cerebras-cloud-sdk

The official Python library for the cerebras API

Pending

Quality

Pending

Does it follow best practices?

Impact

Pending

No eval scenarios have been run

Overview
Eval results
Files

legacy-completions.mddocs/

Legacy Completions

Legacy text completion API for traditional completion-style interactions. Supports text generation with various parameters including temperature, top-p sampling, frequency penalties, and custom stop sequences. This API follows the traditional completion format where the model continues from a given prompt.

Capabilities

Text Completion Creation

Creates text completions using the traditional prompt-based format with extensive configuration options for controlling generation behavior.

def create(
    self,
    *,
    model: str,
    best_of: Optional[int] = NOT_GIVEN,
    echo: Optional[bool] = NOT_GIVEN,
    frequency_penalty: Optional[float] = NOT_GIVEN,
    logit_bias: Optional[Dict[str, int]] = NOT_GIVEN,
    logprobs: Optional[int] = NOT_GIVEN,
    max_tokens: Optional[int] = NOT_GIVEN,
    n: Optional[int] = NOT_GIVEN,
    presence_penalty: Optional[float] = NOT_GIVEN,
    prompt: Union[str, List[str], List[int], List[List[int]], None] = NOT_GIVEN,
    seed: Optional[int] = NOT_GIVEN,
    stop: Union[Optional[str], List[str], None] = NOT_GIVEN,
    stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
    stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
    suffix: Optional[str] = NOT_GIVEN,
    temperature: Optional[float] = NOT_GIVEN,
    top_p: Optional[float] = NOT_GIVEN,
    user: str | NotGiven = NOT_GIVEN,
    grammar_root: Optional[str] = NOT_GIVEN,
    return_raw_tokens: Optional[bool] = NOT_GIVEN,
    **kwargs
) -> Completion:
    """
    Create a text completion.
    
    Parameters:
    - model: ID of the model to use (e.g., "llama3.1-70b")
    - best_of: Generate N completions server-side and return the best one
    - echo: Echo back the prompt in addition to the completion
    - frequency_penalty: Penalty for frequent token usage (-2.0 to 2.0)
    - logit_bias: Modify likelihood of specific tokens appearing
    - logprobs: Include log probabilities on most likely tokens (0-5)
    - max_tokens: Maximum number of tokens to generate
    - n: Number of completion choices to generate
    - presence_penalty: Penalty for token presence (-2.0 to 2.0)
    - prompt: Text prompt(s) to complete (string, list of strings, or token arrays)
    - seed: Random seed for deterministic generation
    - stop: Sequences where generation should stop
    - stream: Enable streaming response (use stream=True for streaming)
    - stream_options: Additional streaming options
    - suffix: Text that comes after the completion (for insertion tasks)
    - temperature: Sampling temperature (0.0 to 2.0)
    - top_p: Nucleus sampling parameter (0.0 to 1.0)
    - user: Unique identifier for the end-user
    - grammar_root: Grammar rule for structured output generation
    - return_raw_tokens: Return raw tokens instead of decoded text
    
    Returns:
    Completion object with generated text
    """

Streaming Text Completion

Creates streaming text completions for real-time token generation.

def create(
    self,
    *,
    model: str,
    prompt: Union[str, List[str], List[int], List[List[int]], None],
    stream: Literal[True],
    **kwargs
) -> Stream[CompletionChunk]:
    """
    Create a streaming text completion.
    
    Parameters:
    - stream: Must be True for streaming responses
    - All other parameters same as non-streaming create()
    
    Returns:
    Stream object yielding CompletionChunk objects
    """

Resource Classes

Synchronous and asynchronous resource classes that provide the completions API methods.

class CompletionsResource(SyncAPIResource):
    """Synchronous completions resource."""
    
    @cached_property
    def with_raw_response(self) -> CompletionsResourceWithRawResponse: ...
    
    @cached_property
    def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse: ...

class AsyncCompletionsResource(AsyncAPIResource):
    """Asynchronous completions resource."""
    
    @cached_property
    def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse: ...
    
    @cached_property
    def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse: ...

Parameter Types

Completion Parameters

class CompletionCreateParams(TypedDict, total=False):
    """Parameters for creating text completions."""
    model: Required[str]
    
    best_of: Optional[int]
    echo: Optional[bool]
    frequency_penalty: Optional[float]
    logit_bias: Optional[Dict[str, int]]
    logprobs: Optional[int]
    max_tokens: Optional[int]
    n: Optional[int]
    presence_penalty: Optional[float]
    prompt: Union[str, List[str], List[int], List[List[int]], None]
    seed: Optional[int]
    stop: Union[Optional[str], List[str], None]
    stream: Optional[bool]
    stream_options: Optional[StreamOptions]
    suffix: Optional[str]
    temperature: Optional[float]
    top_p: Optional[float]
    user: Optional[str]

class StreamOptions(TypedDict, total=False):
    """Options for streaming completions."""
    include_usage: Optional[bool]

Response Types

Completion Response

class Completion(BaseModel):
    """Complete text completion response."""
    id: str
    choices: List[CompletionChoice]
    created: int
    model: str
    object: Literal["text_completion"]
    system_fingerprint: Optional[str]
    usage: Optional[CompletionUsage]

class CompletionChoice(BaseModel):
    """Individual completion choice."""
    finish_reason: Optional[Literal["stop", "length", "content_filter"]]
    index: int
    logprobs: Optional[CompletionLogprobs]
    text: str

class CompletionUsage(BaseModel):
    """Token usage information."""
    completion_tokens: int
    prompt_tokens: int
    total_tokens: int

class CompletionLogprobs(BaseModel):
    """Log probability information."""
    text_offset: List[int]
    token_logprobs: List[Optional[float]]
    tokens: List[str]
    top_logprobs: Optional[List[Dict[str, float]]]

Streaming Response Types

class CompletionChunk(BaseModel):
    """Streaming chunk in text completion."""
    id: str
    choices: List[CompletionChunkChoice]
    created: int
    model: str
    object: Literal["text_completion"]
    system_fingerprint: Optional[str]
    usage: Optional[CompletionUsage]

class CompletionChunkChoice(BaseModel):
    """Choice in streaming chunk."""
    finish_reason: Optional[Literal["stop", "length", "content_filter"]]
    index: int
    logprobs: Optional[CompletionLogprobs]
    text: str

Usage Examples

Basic Text Completion

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="The future of artificial intelligence is",
    max_tokens=100,
    temperature=0.7,
    stop=["\n", "."]
)

print(response.choices[0].text)
print(f"Used {response.usage.total_tokens} tokens")

Text Completion with Multiple Choices

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="Complete this sentence: The most important skill in programming is",
    max_tokens=50,
    n=3,  # Generate 3 different completions
    temperature=0.8
)

for i, choice in enumerate(response.choices):
    print(f"Option {i+1}: {choice.text.strip()}")

Streaming Text Completion

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

stream = client.completions.create(
    model="llama3.1-70b",
    prompt="Write a short poem about machine learning:",
    max_tokens=200,
    stream=True,
    temperature=0.8
)

print("Poem:", end="")
for chunk in stream:
    if chunk.choices[0].text:
        print(chunk.choices[0].text, end="", flush=True)
print()

Text Completion with Log Probabilities

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="The capital of France is",
    max_tokens=10,
    logprobs=5,  # Return top 5 log probabilities
    temperature=0.1
)

choice = response.choices[0]
print(f"Generated text: {choice.text}")

if choice.logprobs:
    print("\nToken probabilities:")
    for token, logprob in zip(choice.logprobs.tokens, choice.logprobs.token_logprobs):
        if logprob is not None:
            probability = round(100 * (2.71828 ** logprob), 2)
            print(f"  '{token}': {probability}%")

Text Insertion (with Suffix)

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

# Complete text in the middle of a sentence
response = client.completions.create(
    model="llama3.1-70b",
    prompt="def fibonacci(n):\n    ",
    suffix="\n    return result",
    max_tokens=100,
    temperature=0.3
)

print("Generated code:")
print(response.choices[0].text)

Best-of Sampling

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="Explain quantum computing in simple terms:",
    max_tokens=150,
    best_of=5,  # Generate 5 completions, return the best one
    n=1,  # Return only the best completion
    temperature=0.8
)

print("Best completion:")
print(response.choices[0].text)

Async Text Completion

import asyncio
from cerebras.cloud.sdk import AsyncCerebras

async def complete_text():
    client = AsyncCerebras()
    
    response = await client.completions.create(
        model="llama3.1-70b",
        prompt="The benefits of renewable energy include",
        max_tokens=100,
        temperature=0.6
    )
    
    print(response.choices[0].text)
    await client.aclose()

asyncio.run(complete_text())

Batch Completions

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

prompts = [
    "The advantages of solar power are",
    "Wind energy is beneficial because",
    "Hydroelectric power works by"
]

response = client.completions.create(
    model="llama3.1-70b",
    prompt=prompts,  # Multiple prompts
    max_tokens=50,
    temperature=0.5
)

for i, choice in enumerate(response.choices):
    print(f"Prompt {i+1} completion: {choice.text.strip()}")

Frequency and Presence Penalties

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="List the planets in our solar system:",
    max_tokens=100,
    frequency_penalty=0.5,  # Reduce repetition
    presence_penalty=0.3,   # Encourage new topics
    temperature=0.7
)

print(response.choices[0].text)

Stop Sequences

from cerebras.cloud.sdk import Cerebras

client = Cerebras()

response = client.completions.create(
    model="llama3.1-70b",
    prompt="Q: What is photosynthesis?\nA:",
    max_tokens=200,
    stop=["Q:", "\n\n"],  # Stop at next question or double newline
    temperature=0.5
)

print(f"Answer: {response.choices[0].text.strip()}")

Install with Tessl CLI

npx tessl i tessl/pypi-cerebras-cloud-sdk

docs

chat-completions.md

client-management.md

index.md

legacy-completions.md

models.md

types-and-configuration.md

tile.json