An integration package connecting Google's genai package and LangChain
—
Simple text generation interface providing direct access to Google's Gemini models for completion-style tasks. This interface extends LangChain's BaseLLM and is designed for straightforward text generation without the complexity of conversational context management.
Text generation LLM that wraps Google's Gemini models in a simple completion interface.
class GoogleGenerativeAI:
def __init__(
self,
*,
model: str,
google_api_key: Optional[SecretStr] = None,
credentials: Any = None,
temperature: float = 0.7,
top_p: Optional[float] = None,
top_k: Optional[int] = None,
max_output_tokens: Optional[int] = None,
n: int = 1,
max_retries: int = 6,
timeout: Optional[float] = None,
client_options: Optional[Dict] = None,
transport: Optional[str] = None,
additional_headers: Optional[Dict[str, str]] = None,
response_modalities: Optional[List[Modality]] = None,
thinking_budget: Optional[int] = None,
include_thoughts: Optional[bool] = None,
safety_settings: Optional[Dict[HarmCategory, HarmBlockThreshold]] = None
)Parameters:
model (str): Model name (e.g., "gemini-2.5-pro", "gemini-2.0-flash")google_api_key (Optional[SecretStr]): Google API key (defaults to GOOGLE_API_KEY env var)credentials (Any): Google authentication credentials objecttemperature (float): Generation temperature [0.0, 2.0], controls randomnesstop_p (Optional[float]): Nucleus sampling parameter [0.0, 1.0]top_k (Optional[int]): Top-k sampling parameter for vocabulary selectionmax_output_tokens (Optional[int]): Maximum tokens in responsen (int): Number of completions to generate (default: 1)max_retries (int): Maximum retry attempts for failed requests (default: 6)timeout (Optional[float]): Request timeout in secondsclient_options (Optional[Dict]): API client configuration optionstransport (Optional[str]): Transport method ["rest", "grpc", "grpc_asyncio"]additional_headers (Optional[Dict[str, str]]): Additional HTTP headersresponse_modalities (Optional[List[Modality]]): Response output modalitiesthinking_budget (Optional[int]): Thinking budget in tokens for reasoninginclude_thoughts (Optional[bool]): Include reasoning thoughts in responsesafety_settings (Optional[Dict[HarmCategory, HarmBlockThreshold]]): Content safety configurationdef invoke(
self,
input: Union[str, List[BaseMessage]],
config: Optional[RunnableConfig] = None,
*,
stop: Optional[List[str]] = None,
**kwargs: Any
) -> strGenerate text completion for the given input.
Parameters:
input: Input text prompt or list of messagesconfig: Optional run configurationstop: List of stop sequences to end generation**kwargs: Additional generation parametersReturns: Generated text as string
async def ainvoke(
self,
input: Union[str, List[BaseMessage]],
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> strAsync version of invoke().
def stream(
self,
input: Union[str, List[BaseMessage]],
config: Optional[RunnableConfig] = None,
*,
stop: Optional[List[str]] = None,
**kwargs: Any
) -> Iterator[str]Stream text generation as chunks.
Parameters:
input: Input text prompt or list of messagesconfig: Optional run configurationstop: List of stop sequences**kwargs: Additional generation parametersReturns: Iterator of text chunks
async def astream(
self,
input: Union[str, List[BaseMessage]],
config: Optional[RunnableConfig] = None,
**kwargs: Any
) -> AsyncIterator[str]Async version of stream().
def get_num_tokens(self, text: str) -> intEstimate token count for input text.
Parameters:
text (str): Input text to count tokens forReturns: Estimated token count
from langchain_google_genai import GoogleGenerativeAI
# Initialize LLM
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
# Generate text completion
result = llm.invoke("Once upon a time in a land of artificial intelligence")
print(result)# Stream text as it's generated
for chunk in llm.stream("Write a short story about robots learning to paint"):
print(chunk, end="", flush=True)
print() # New line after streaming# Creative writing with higher temperature
creative_llm = GoogleGenerativeAI(
model="gemini-2.5-pro",
temperature=1.2 # More creative/random
)
creative_text = creative_llm.invoke("Describe a futuristic city")
# Factual content with lower temperature
factual_llm = GoogleGenerativeAI(
model="gemini-2.5-pro",
temperature=0.1 # More focused/deterministic
)
factual_text = factual_llm.invoke("Explain photosynthesis")# Configure generation parameters
llm = GoogleGenerativeAI(
model="gemini-2.5-pro",
max_output_tokens=500, # Limit response length
top_p=0.8, # Nucleus sampling
top_k=40 # Top-k sampling
)
result = llm.invoke("Write a summary of machine learning")from langchain_google_genai import HarmCategory, HarmBlockThreshold
# Configure content safety
safe_llm = GoogleGenerativeAI(
model="gemini-2.5-pro",
safety_settings={
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
}
)
result = safe_llm.invoke("Generate helpful and safe content")import asyncio
async def generate_multiple():
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
# Generate multiple completions concurrently
tasks = [
llm.ainvoke("Write about space exploration"),
llm.ainvoke("Write about ocean conservation"),
llm.ainvoke("Write about renewable energy")
]
results = await asyncio.gather(*tasks)
for i, result in enumerate(results, 1):
print(f"Result {i}: {result[:100]}...")
# Run async example
asyncio.run(generate_multiple())# Use stop sequences to control generation
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
result = llm.invoke(
"List the planets in our solar system:\n1.",
stop=["\n\n", "10."] # Stop at double newline or item 10
)
print(result)# Configure API client options
llm = GoogleGenerativeAI(
model="gemini-2.5-pro",
client_options={
"api_endpoint": "https://generativelanguage.googleapis.com"
},
transport="rest", # Use REST instead of gRPC
additional_headers={
"User-Agent": "MyApp/1.0"
},
timeout=30.0 # 30 second timeout
)
result = llm.invoke("Generate content with custom configuration")from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
# Create a simple chain
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
prompt = PromptTemplate.from_template(
"Write a {style} poem about {topic}"
)
output_parser = StrOutputParser()
# Build chain
chain = prompt | llm | output_parser
# Use chain
result = chain.invoke({
"style": "haiku",
"topic": "artificial intelligence"
})
print(result)# Estimate tokens before generation
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
prompt = "Explain quantum computing in detail"
token_count = llm.get_num_tokens(prompt)
print(f"Input tokens: {token_count}")
# Generate with awareness of token usage
if token_count < 1000: # Stay within limits
result = llm.invoke(prompt)
print(f"Generated: {result[:100]}...")
else:
print("Prompt too long, consider shortening")Handle errors appropriately for LLM operations:
from langchain_google_genai import GoogleGenerativeAI
try:
llm = GoogleGenerativeAI(model="gemini-2.5-pro")
result = llm.invoke("Your prompt here")
print(result)
except Exception as e:
if "safety" in str(e).lower():
print(f"Content blocked by safety filters: {e}")
elif "model" in str(e).lower():
print(f"Model error: {e}")
else:
print(f"Generation error: {e}")Install with Tessl CLI
npx tessl i tessl/pypi-langchain-google-genai