The official Python library for the cerebras API
—
Quality
Pending
Does it follow best practices?
Impact
Pending
No eval scenarios have been run
Legacy text completion API for traditional completion-style interactions. Supports text generation with various parameters including temperature, top-p sampling, frequency penalties, and custom stop sequences. This API follows the traditional completion format where the model continues from a given prompt.
Creates text completions using the traditional prompt-based format with extensive configuration options for controlling generation behavior.
def create(
self,
*,
model: str,
best_of: Optional[int] = NOT_GIVEN,
echo: Optional[bool] = NOT_GIVEN,
frequency_penalty: Optional[float] = NOT_GIVEN,
logit_bias: Optional[Dict[str, int]] = NOT_GIVEN,
logprobs: Optional[int] = NOT_GIVEN,
max_tokens: Optional[int] = NOT_GIVEN,
n: Optional[int] = NOT_GIVEN,
presence_penalty: Optional[float] = NOT_GIVEN,
prompt: Union[str, List[str], List[int], List[List[int]], None] = NOT_GIVEN,
seed: Optional[int] = NOT_GIVEN,
stop: Union[Optional[str], List[str], None] = NOT_GIVEN,
stream: Optional[Literal[False]] | NotGiven = NOT_GIVEN,
stream_options: Optional[completion_create_params.StreamOptions] | NotGiven = NOT_GIVEN,
suffix: Optional[str] = NOT_GIVEN,
temperature: Optional[float] = NOT_GIVEN,
top_p: Optional[float] = NOT_GIVEN,
user: str | NotGiven = NOT_GIVEN,
grammar_root: Optional[str] = NOT_GIVEN,
return_raw_tokens: Optional[bool] = NOT_GIVEN,
**kwargs
) -> Completion:
"""
Create a text completion.
Parameters:
- model: ID of the model to use (e.g., "llama3.1-70b")
- best_of: Generate N completions server-side and return the best one
- echo: Echo back the prompt in addition to the completion
- frequency_penalty: Penalty for frequent token usage (-2.0 to 2.0)
- logit_bias: Modify likelihood of specific tokens appearing
- logprobs: Include log probabilities on most likely tokens (0-5)
- max_tokens: Maximum number of tokens to generate
- n: Number of completion choices to generate
- presence_penalty: Penalty for token presence (-2.0 to 2.0)
- prompt: Text prompt(s) to complete (string, list of strings, or token arrays)
- seed: Random seed for deterministic generation
- stop: Sequences where generation should stop
- stream: Enable streaming response (use stream=True for streaming)
- stream_options: Additional streaming options
- suffix: Text that comes after the completion (for insertion tasks)
- temperature: Sampling temperature (0.0 to 2.0)
- top_p: Nucleus sampling parameter (0.0 to 1.0)
- user: Unique identifier for the end-user
- grammar_root: Grammar rule for structured output generation
- return_raw_tokens: Return raw tokens instead of decoded text
Returns:
Completion object with generated text
"""Creates streaming text completions for real-time token generation.
def create(
self,
*,
model: str,
prompt: Union[str, List[str], List[int], List[List[int]], None],
stream: Literal[True],
**kwargs
) -> Stream[CompletionChunk]:
"""
Create a streaming text completion.
Parameters:
- stream: Must be True for streaming responses
- All other parameters same as non-streaming create()
Returns:
Stream object yielding CompletionChunk objects
"""Synchronous and asynchronous resource classes that provide the completions API methods.
class CompletionsResource(SyncAPIResource):
"""Synchronous completions resource."""
@cached_property
def with_raw_response(self) -> CompletionsResourceWithRawResponse: ...
@cached_property
def with_streaming_response(self) -> CompletionsResourceWithStreamingResponse: ...
class AsyncCompletionsResource(AsyncAPIResource):
"""Asynchronous completions resource."""
@cached_property
def with_raw_response(self) -> AsyncCompletionsResourceWithRawResponse: ...
@cached_property
def with_streaming_response(self) -> AsyncCompletionsResourceWithStreamingResponse: ...class CompletionCreateParams(TypedDict, total=False):
"""Parameters for creating text completions."""
model: Required[str]
best_of: Optional[int]
echo: Optional[bool]
frequency_penalty: Optional[float]
logit_bias: Optional[Dict[str, int]]
logprobs: Optional[int]
max_tokens: Optional[int]
n: Optional[int]
presence_penalty: Optional[float]
prompt: Union[str, List[str], List[int], List[List[int]], None]
seed: Optional[int]
stop: Union[Optional[str], List[str], None]
stream: Optional[bool]
stream_options: Optional[StreamOptions]
suffix: Optional[str]
temperature: Optional[float]
top_p: Optional[float]
user: Optional[str]
class StreamOptions(TypedDict, total=False):
"""Options for streaming completions."""
include_usage: Optional[bool]class Completion(BaseModel):
"""Complete text completion response."""
id: str
choices: List[CompletionChoice]
created: int
model: str
object: Literal["text_completion"]
system_fingerprint: Optional[str]
usage: Optional[CompletionUsage]
class CompletionChoice(BaseModel):
"""Individual completion choice."""
finish_reason: Optional[Literal["stop", "length", "content_filter"]]
index: int
logprobs: Optional[CompletionLogprobs]
text: str
class CompletionUsage(BaseModel):
"""Token usage information."""
completion_tokens: int
prompt_tokens: int
total_tokens: int
class CompletionLogprobs(BaseModel):
"""Log probability information."""
text_offset: List[int]
token_logprobs: List[Optional[float]]
tokens: List[str]
top_logprobs: Optional[List[Dict[str, float]]]class CompletionChunk(BaseModel):
"""Streaming chunk in text completion."""
id: str
choices: List[CompletionChunkChoice]
created: int
model: str
object: Literal["text_completion"]
system_fingerprint: Optional[str]
usage: Optional[CompletionUsage]
class CompletionChunkChoice(BaseModel):
"""Choice in streaming chunk."""
finish_reason: Optional[Literal["stop", "length", "content_filter"]]
index: int
logprobs: Optional[CompletionLogprobs]
text: strfrom cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="The future of artificial intelligence is",
max_tokens=100,
temperature=0.7,
stop=["\n", "."]
)
print(response.choices[0].text)
print(f"Used {response.usage.total_tokens} tokens")from cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="Complete this sentence: The most important skill in programming is",
max_tokens=50,
n=3, # Generate 3 different completions
temperature=0.8
)
for i, choice in enumerate(response.choices):
print(f"Option {i+1}: {choice.text.strip()}")from cerebras.cloud.sdk import Cerebras
client = Cerebras()
stream = client.completions.create(
model="llama3.1-70b",
prompt="Write a short poem about machine learning:",
max_tokens=200,
stream=True,
temperature=0.8
)
print("Poem:", end="")
for chunk in stream:
if chunk.choices[0].text:
print(chunk.choices[0].text, end="", flush=True)
print()from cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="The capital of France is",
max_tokens=10,
logprobs=5, # Return top 5 log probabilities
temperature=0.1
)
choice = response.choices[0]
print(f"Generated text: {choice.text}")
if choice.logprobs:
print("\nToken probabilities:")
for token, logprob in zip(choice.logprobs.tokens, choice.logprobs.token_logprobs):
if logprob is not None:
probability = round(100 * (2.71828 ** logprob), 2)
print(f" '{token}': {probability}%")from cerebras.cloud.sdk import Cerebras
client = Cerebras()
# Complete text in the middle of a sentence
response = client.completions.create(
model="llama3.1-70b",
prompt="def fibonacci(n):\n ",
suffix="\n return result",
max_tokens=100,
temperature=0.3
)
print("Generated code:")
print(response.choices[0].text)from cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="Explain quantum computing in simple terms:",
max_tokens=150,
best_of=5, # Generate 5 completions, return the best one
n=1, # Return only the best completion
temperature=0.8
)
print("Best completion:")
print(response.choices[0].text)import asyncio
from cerebras.cloud.sdk import AsyncCerebras
async def complete_text():
client = AsyncCerebras()
response = await client.completions.create(
model="llama3.1-70b",
prompt="The benefits of renewable energy include",
max_tokens=100,
temperature=0.6
)
print(response.choices[0].text)
await client.aclose()
asyncio.run(complete_text())from cerebras.cloud.sdk import Cerebras
client = Cerebras()
prompts = [
"The advantages of solar power are",
"Wind energy is beneficial because",
"Hydroelectric power works by"
]
response = client.completions.create(
model="llama3.1-70b",
prompt=prompts, # Multiple prompts
max_tokens=50,
temperature=0.5
)
for i, choice in enumerate(response.choices):
print(f"Prompt {i+1} completion: {choice.text.strip()}")from cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="List the planets in our solar system:",
max_tokens=100,
frequency_penalty=0.5, # Reduce repetition
presence_penalty=0.3, # Encourage new topics
temperature=0.7
)
print(response.choices[0].text)from cerebras.cloud.sdk import Cerebras
client = Cerebras()
response = client.completions.create(
model="llama3.1-70b",
prompt="Q: What is photosynthesis?\nA:",
max_tokens=200,
stop=["Q:", "\n\n"], # Stop at next question or double newline
temperature=0.5
)
print(f"Answer: {response.choices[0].text.strip()}")Install with Tessl CLI
npx tessl i tessl/pypi-cerebras-cloud-sdk