An integration package connecting Google's genai package and LangChain
—
Managed semantic search and document retrieval using Google's vector store infrastructure. Provides corpus and document management, similarity search, and integration with Google's AQA (Attributed Question Answering) service.
Primary vector store interface that extends LangChain's VectorStore to provide managed storage and retrieval using Google's semantic retriever service.
class GoogleVectorStore:
def __init__(
self,
*,
corpus_id: str,
document_id: Optional[str] = None
)Parameters:
corpus_id (str): Google corpus identifier for the vector storedocument_id (Optional[str]): Specific document within the corpus (optional)def add_texts(
self,
texts: Iterable[str],
metadatas: Optional[List[Dict[str, Any]]] = None,
*,
document_id: Optional[str] = None,
**kwargs: Any
) -> List[str]Add texts to the vector store as searchable chunks.
Parameters:
texts (Iterable[str]): Texts to add to the storemetadatas (Optional[List[Dict]]): Metadata for each text chunkdocument_id (Optional[str]): Target document ID (required if store not initialized with document_id)**kwargs: Additional parametersReturns: List of chunk IDs for the added texts
def similarity_search(
self,
query: str,
k: int = 4,
filter: Optional[Dict[str, Any]] = None,
**kwargs: Any
) -> List[Document]Perform semantic search to find similar documents.
Parameters:
query (str): Search query textk (int): Number of results to return (default: 4)filter (Optional[Dict]): Metadata filters for search**kwargs: Additional search parametersReturns: List of Document objects with relevant content
def similarity_search_with_score(
self,
query: str,
k: int = 4,
filter: Optional[Dict[str, Any]] = None,
**kwargs: Any
) -> List[Tuple[Document, float]]Perform similarity search with relevance scores.
Parameters:
query (str): Search query textk (int): Number of results to return (default: 4)filter (Optional[Dict]): Metadata filters for search**kwargs: Additional search parametersReturns: List of tuples containing (Document, relevance_score)
@property
def name(self) -> strReturns the full name/path of the Google entity.
@property
def corpus_id(self) -> strReturns the corpus ID managed by this vector store.
@property
def document_id(self) -> Optional[str]Returns the document ID managed by this vector store (if any).
def delete(
self,
ids: Optional[List[str]] = None,
**kwargs: Any
) -> Optional[bool]Delete documents or chunks from the vector store.
Parameters:
ids (Optional[List[str]]): Specific chunk IDs to delete**kwargs: Additional parametersReturns: Success status
async def adelete(
self,
ids: Optional[List[str]] = None,
**kwargs: Any
) -> Optional[bool]Async version of delete().
@classmethod
def create_corpus(
cls,
corpus_id: Optional[str] = None,
display_name: Optional[str] = None
) -> "GoogleVectorStore"Create a new corpus on Google's servers.
Parameters:
corpus_id (Optional[str]): Desired corpus ID (auto-generated if None)display_name (Optional[str]): Human-readable name for the corpusReturns: GoogleVectorStore instance for the new corpus
@classmethod
def create_document(
cls,
corpus_id: str,
document_id: Optional[str] = None,
display_name: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = None
) -> "GoogleVectorStore"Create a new document within an existing corpus.
Parameters:
corpus_id (str): Target corpus IDdocument_id (Optional[str]): Desired document ID (auto-generated if None)display_name (Optional[str]): Human-readable name for the documentmetadata (Optional[Dict]): Custom metadata for the documentReturns: GoogleVectorStore instance for the new document
@classmethod
def from_texts(
cls,
texts: List[str],
embedding: Optional[Embeddings] = None,
metadatas: Optional[List[Dict]] = None,
*,
corpus_id: Optional[str] = None,
document_id: Optional[str] = None,
**kwargs: Any
) -> "GoogleVectorStore"Create vector store and populate with texts in one operation.
Parameters:
texts (List[str]): Initial texts to addembedding (Optional[Embeddings]): Embedding model (uses server-side if None)metadatas (Optional[List[Dict]]): Metadata for each textcorpus_id (Optional[str]): Target corpus (created if doesn't exist)document_id (Optional[str]): Target document (created if doesn't exist)**kwargs: Additional parametersReturns: GoogleVectorStore instance with populated content
def as_aqa(self, **kwargs: Any) -> Runnable[str, AqaOutput]Create a runnable that performs attributed question answering using the vector store content.
Parameters:
**kwargs: Additional AQA configuration parametersReturns: Runnable that takes a query string and returns AqaOutput with attributed answers
def as_retriever(self, **kwargs: Any) -> VectorStoreRetrieverConvert to a LangChain retriever for use in chains.
Parameters:
**kwargs: Retriever configuration parametersReturns: VectorStoreRetriever instance
from langchain_google_genai import GoogleVectorStore
# Create a new corpus
vector_store = GoogleVectorStore.create_corpus(
corpus_id="my-ai-knowledge-base",
display_name="AI Knowledge Base"
)
print(f"Created corpus: {vector_store.corpus_id}")
# Add documents to the corpus
texts = [
"Machine learning is a subset of artificial intelligence.",
"Deep learning uses neural networks with multiple layers.",
"Natural language processing focuses on understanding text.",
"Computer vision enables machines to interpret images."
]
# Add texts (will create a document automatically)
chunk_ids = vector_store.add_texts(texts)
print(f"Added {len(chunk_ids)} chunks")# Create a document within a corpus
doc_store = GoogleVectorStore.create_document(
corpus_id="my-ai-knowledge-base",
document_id="ml-basics",
display_name="Machine Learning Basics",
metadata={"topic": "machine-learning", "level": "beginner"}
)
# Add content to the specific document
ml_texts = [
"Supervised learning uses labeled data for training.",
"Unsupervised learning finds patterns in unlabeled data.",
"Reinforcement learning learns through trial and error."
]
doc_store.add_texts(ml_texts)# Connect to existing corpus
vector_store = GoogleVectorStore(corpus_id="my-ai-knowledge-base")
# Perform similarity search
query = "What is deep learning?"
results = vector_store.similarity_search(query, k=3)
for i, doc in enumerate(results, 1):
print(f"Result {i}: {doc.page_content}")
print(f"Metadata: {doc.metadata}")
print()# Get similarity scores with results
results_with_scores = vector_store.similarity_search_with_score(
"Explain neural networks",
k=5
)
for doc, score in results_with_scores:
print(f"Score: {score:.3f} - {doc.page_content}")from langchain_core.documents import Document
# Create vector store from texts in one step
documents = [
"Python is a versatile programming language.",
"JavaScript is essential for web development.",
"SQL is used for database operations.",
"Docker helps with application containerization."
]
metadata = [
{"category": "programming", "language": "python"},
{"category": "programming", "language": "javascript"},
{"category": "database", "language": "sql"},
{"category": "devops", "tool": "docker"}
]
# Create and populate vector store
vector_store = GoogleVectorStore.from_texts(
texts=documents,
metadatas=metadata,
corpus_id="programming-knowledge",
document_id="languages-and-tools"
)
# Search with metadata filtering
results = vector_store.similarity_search(
"What programming languages are available?",
filter={"category": "programming"}
)from langchain_google_genai import AqaInput
# Create AQA runnable from vector store
aqa = vector_store.as_aqa()
# Perform attributed question answering
query = "What are the main types of machine learning?"
aqa_result = aqa.invoke(query)
print(f"Answer: {aqa_result.answer}")
print(f"Confidence: {aqa_result.answerable_probability:.2f}")
print("Sources used:")
for passage in aqa_result.attributed_passages:
print(f"- {passage}")from langchain_core.prompts import PromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI
# Convert to retriever
retriever = vector_store.as_retriever(search_kwargs={"k": 3})
# Use in a RAG chain
llm = ChatGoogleGenerativeAI(model="gemini-2.5-pro")
template = """Based on the following context, answer the question:
Context:
{context}
Question: {question}
Answer:"""
prompt = PromptTemplate.from_template(template)
# Create RAG chain
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
def format_docs(docs):
return "\n\n".join(doc.page_content for doc in docs)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
| prompt
| llm
| StrOutputParser()
)
# Ask questions with retrieval
answer = rag_chain.invoke("What is the difference between supervised and unsupervised learning?")
print(answer)# Delete specific chunks
vector_store = GoogleVectorStore(corpus_id="my-corpus")
# Add texts first
chunk_ids = vector_store.add_texts([
"Text to be deleted later",
"Important text to keep"
])
# Delete specific chunk
success = vector_store.delete(ids=[chunk_ids[0]])
print(f"Deletion successful: {success}")import asyncio
async def manage_vector_store():
vector_store = GoogleVectorStore(corpus_id="async-corpus")
# Async deletion
success = await vector_store.adelete(ids=["chunk-id-1", "chunk-id-2"])
print(f"Async deletion: {success}")
asyncio.run(manage_vector_store())from langchain_google_genai import DoesNotExistsException
try:
# Try to connect to non-existent corpus
vector_store = GoogleVectorStore(corpus_id="non-existent-corpus")
except DoesNotExistsException as e:
print(f"Vector store error: {e}")
# Create the corpus instead
vector_store = GoogleVectorStore.create_corpus(
corpus_id="new-corpus",
display_name="Newly Created Corpus"
)class ServerSideEmbedding:
def embed_documents(self, texts: List[str]) -> List[List[float]]
def embed_query(self, text: str) -> List[float]Placeholder embedding class for server-side embeddings (returns empty vectors as Google handles embedding internally).
class DoesNotExistsException(Exception):
def __init__(self, *, corpus_id: str, document_id: Optional[str] = None)Exception raised when trying to access a corpus or document that doesn't exist on Google's servers.
Parameters:
corpus_id (str): The corpus ID that doesn't existdocument_id (Optional[str]): The document ID that doesn't exist (if applicable)Install with Tessl CLI
npx tessl i tessl/pypi-langchain-google-genai