LLM framework to build customizable, production-ready LLM applications with pipelines connecting models, vector DBs, and data processors.
—
Reading comprehension components for extractive question answering using FARM, Transformers, and specialized table readers.
from haystack.nodes.reader import FARMReader, TransformersReader, TableReader
from haystack.nodes.reader.base import BaseReaderfrom haystack.nodes.reader.base import BaseReader
from haystack.schema import Document, Answer
from typing import List, Optional, Dict, Any
class BaseReader:
def predict(self, query: str, documents: List[Document], top_k: Optional[int] = None) -> List[Answer]:
"""
Extract answers from documents for the given query.
Args:
query: Question text
documents: List of documents to search for answers
top_k: Maximum number of answers to return
Returns:
List of Answer objects with extracted text and confidence scores
"""from haystack.nodes.reader.farm import FARMReader
class FARMReader(BaseReader):
def __init__(self, model_name_or_path: str = "deepset/roberta-base-squad2",
use_gpu: bool = True, no_ans_boost: float = 0.0,
return_no_answer: bool = False, top_k: int = 10,
max_seq_len: int = 256, doc_stride: int = 128):
"""
Initialize FARM-based QA reader.
Args:
model_name_or_path: HuggingFace model name or local path
use_gpu: Whether to use GPU acceleration
no_ans_boost: Boost for "no answer" predictions
return_no_answer: Whether to return "no answer" predictions
top_k: Number of answers to return per document
max_seq_len: Maximum sequence length for input
doc_stride: Stride for sliding window over long documents
"""from haystack.nodes.reader.transformers import TransformersReader
class TransformersReader(BaseReader):
def __init__(self, model_name_or_path: str = "deepset/roberta-base-squad2",
tokenizer: Optional[str] = None, use_gpu: bool = True,
top_k: int = 10, max_seq_len: int = 256, doc_stride: int = 128):
"""
Initialize Transformers-based QA reader.
Args:
model_name_or_path: HuggingFace model name or local path
tokenizer: Tokenizer name (defaults to model tokenizer)
use_gpu: Whether to use GPU acceleration
top_k: Number of answers to return per document
max_seq_len: Maximum sequence length for input
doc_stride: Stride for sliding window over long documents
"""from haystack.nodes.reader.table import TableReader
class TableReader(BaseReader):
def __init__(self, model_name_or_path: str = "google/tapas-base-finetuned-wtq",
use_gpu: bool = True, top_k: int = 10):
"""
Initialize table-based QA reader for structured data.
Args:
model_name_or_path: TAPAS model name or local path
use_gpu: Whether to use GPU acceleration
top_k: Number of answers to return
"""Install with Tessl CLI
npx tessl i tessl/pypi-farm-haystack