Agentic RAG with document processing, embeddings, and vector search.
from agno.knowledge import Knowledge
from agno.vectordb import VectorDb
from agno.knowledge.embedder import Embedder
from agno.knowledge.reader import Reader
class Knowledge:
def __init__(
self,
vector_db: VectorDb,
*,
embedder: Optional[Embedder] = None,
reader: Optional[Reader] = None,
num_documents: int = 5,
num_on_rerank: Optional[int] = None,
optimize_on: Optional[int] = None,
chunking_strategy: Optional[Any] = None,
**kwargs
): ...
def add_contents(
self,
documents: List[Union[str, Path, Document]],
upsert: bool = False,
skip_existing: bool = True
) -> None:
"""Add documents to the knowledge base."""
def search(
self,
query: str,
num_documents: Optional[int] = None,
filters: Optional[Union[Dict, List]] = None
) -> List[Document]:
"""Search for relevant documents."""# PostgreSQL with pgvector
from agno.vectordb.pgvector import PgVector
class PgVector:
def __init__(
self,
table_name: str,
db_url: Optional[str] = None,
embedder: Optional[Embedder] = None,
schema: str = "ai",
**kwargs
): ...
# Pinecone
from agno.vectordb.pineconedb import PineconeDb
class PineconeDb:
def __init__(
self,
name: str,
api_key: Optional[str] = None,
embedder: Optional[Embedder] = None,
dimension: int = 1536,
**kwargs
): ...
# Qdrant
from agno.vectordb.qdrant import Qdrant
class Qdrant:
def __init__(
self,
collection: str,
url: Optional[str] = None,
api_key: Optional[str] = None,
embedder: Optional[Embedder] = None,
**kwargs
): ...
# ChromaDB
from agno.vectordb.chroma import ChromaDb
class ChromaDb:
def __init__(
self,
collection: str,
path: Optional[str] = None,
embedder: Optional[Embedder] = None,
**kwargs
): ...
# Weaviate
from agno.vectordb.weaviate import Weaviate
# Milvus
from agno.vectordb.milvus import Milvus
# LanceDB
from agno.vectordb.lancedb import LanceDb
# And 10+ more...# OpenAI
from agno.knowledge.embedder import OpenAIEmbedder
class OpenAIEmbedder:
def __init__(
self,
id: str = "text-embedding-3-small",
api_key: Optional[str] = None,
dimensions: Optional[int] = None,
**kwargs
): ...
# Cohere
from agno.knowledge.embedder import CohereEmbedder
class CohereEmbedder:
def __init__(
self,
id: str = "embed-english-v3.0",
api_key: Optional[str] = None,
**kwargs
): ...
# HuggingFace
from agno.knowledge.embedder import SentenceTransformerEmbedder
class SentenceTransformerEmbedder:
def __init__(
self,
id: str = "all-MiniLM-L6-v2",
**kwargs
): ...
# Ollama (local)
from agno.knowledge.embedder import OllamaEmbedder
# Also: Gemini, Mistral, VoyageAI, Jina, FastEmbed, AWS Bedrock, etc.from agno.knowledge.reader import (
PDFReader,
DocxReader,
PPTXReader,
CSVReader,
TextReader,
MarkdownReader,
WebsiteReader,
YouTubeReader,
ArxivReader,
WikipediaReader
)
# PDF Reader
class PDFReader:
def __init__(self, **kwargs): ...
def read(self, path: Path) -> List[Document]: ...
# Website Reader
class WebsiteReader:
def __init__(
self,
max_depth: int = 3,
max_links: int = 10,
**kwargs
): ...
# YouTube Reader
class YouTubeReader:
def __init__(self, **kwargs): ...
def read(self, url: str) -> List[Document]: ...from agno.knowledge.document import Document
class Document:
def __init__(
self,
content: str,
id: Optional[str] = None,
name: Optional[str] = None,
meta_data: Optional[Dict[str, Any]] = None,
embedder: Optional[Embedder] = None,
**kwargs
): ...
@property
def embedding(self) -> List[float]:
"""Get or generate document embedding."""from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.knowledge import Knowledge
from agno.vectordb.pgvector import PgVector
from agno.knowledge.embedder import OpenAIEmbedder
from agno.knowledge.reader import PDFReader
# Create knowledge base
knowledge = Knowledge(
vector_db=PgVector(
table_name="documents",
db_url="postgresql://localhost:5432/aidb"
),
embedder=OpenAIEmbedder(),
reader=PDFReader(),
num_documents=5
)
# Add documents
knowledge.add_contents([
"document1.pdf",
"document2.pdf",
"document3.pdf"
])
# Create agent with RAG
agent = Agent(
model=OpenAIChat(id="gpt-4"),
knowledge=knowledge,
add_knowledge_to_context=True,
instructions=["Use the knowledge base to answer questions"]
)
response = agent.run("What does the document say about feature X?")from agno.knowledge import Knowledge
from agno.vectordb.chroma import ChromaDb
from agno.knowledge.reader import (
PDFReader,
DocxReader,
WebsiteReader
)
knowledge = Knowledge(
vector_db=ChromaDb(collection="my_docs"),
num_documents=10
)
# Add various document types
knowledge.add_contents(
["report.pdf", "notes.docx"],
reader=PDFReader() # Will auto-detect types
)
# Add websites
website_reader = WebsiteReader(max_depth=2)
knowledge.add_contents(
["https://docs.example.com"],
reader=website_reader
)from agno.agent import Agent
from agno.models.openai import OpenAIChat
from agno.knowledge import Knowledge
from agno.vectordb.pineconedb import PineconeDb
knowledge = Knowledge(
vector_db=PineconeDb(
name="docs",
api_key="your-key"
),
num_documents=5
)
# Agent can dynamically choose filters
agent = Agent(
model=OpenAIChat(id="gpt-4"),
knowledge=knowledge,
add_knowledge_to_context=True,
enable_agentic_knowledge_filters=True, # Agent chooses filters
instructions=["Search relevant documents to answer questions"]
)
response = agent.run("Find information about Python in the 2023 documents")
# Agent automatically applies year filterfrom agno.knowledge import Knowledge
from agno.vectordb.lancedb import LanceDb
from agno.knowledge.embedder import SentenceTransformerEmbedder
# Use local embeddings (no API costs)
knowledge = Knowledge(
vector_db=LanceDb(
table_name="local_docs",
uri="./lancedb"
),
embedder=SentenceTransformerEmbedder(
id="all-MiniLM-L6-v2"
)
)
knowledge.add_contents(["docs/"])from agno.knowledge import Knowledge
from agno.vectordb.qdrant import Qdrant
knowledge = Knowledge(
vector_db=Qdrant(
collection="docs",
url="http://localhost:6333"
),
num_documents=20, # Get 20 candidates
num_on_rerank=5, # Rerank to top 5
)