Amazon Redshift connector for Python implementing Python Database API Specification 2.0
npx @tessl/cli install tessl/pypi-redshift-connector@2.1.0A pure Python connector for Amazon Redshift that implements the Python Database API Specification 2.0. This library provides seamless integration with popular data science libraries like pandas and numpy, while supporting Redshift-specific features including IAM authentication, Identity provider (IdP) authentication, and Redshift-specific data types. The library is designed for high-performance database connectivity with comprehensive support for Python versions 3.6 through 3.11, making it suitable for data analytics workflows, ETL processes, business intelligence applications, and any Python application requiring direct access to Amazon Redshift databases with enterprise-grade authentication and data type handling capabilities.
pip install redshift_connectorpip install redshift_connector[full] (includes pandas and numpy support)import redshift_connectorCommon patterns for working with the connector:
from redshift_connector import connect, Connection, Cursor
from redshift_connector import Error, InterfaceError, ProgrammingErrorimport redshift_connector
# Basic connection with username/password
conn = redshift_connector.connect(
host='examplecluster.abc123xyz789.us-west-1.redshift.amazonaws.com',
database='dev',
user='awsuser',
password='my_password'
)
cursor = conn.cursor()
cursor.execute("CREATE TEMP TABLE book(bookname varchar, author varchar)")
cursor.executemany("INSERT INTO book (bookname, author) VALUES (%s, %s)",
[('One Hundred Years of Solitude', 'Gabriel García Márquez'),
('A Brief History of Time', 'Stephen Hawking')])
cursor.execute("SELECT * FROM book")
result = cursor.fetchall()
print(result)
# Clean up
cursor.close()
conn.close()The redshift_connector follows the Python Database API Specification 2.0 architecture:
connect() function creates Connection instances with comprehensive authentication optionsThis design enables the connector to serve as a comprehensive database access layer for Python applications requiring enterprise-grade Redshift connectivity.
Essential database connectivity functionality including connection establishment, query execution, result fetching, and transaction management. This forms the foundation of the DB-API 2.0 interface.
def connect(
user: str = None,
database: str = None,
password: str = None,
host: str = None,
port: int = None,
# ... 60+ additional parameters
) -> Connection: ...
class Connection:
def cursor(self) -> Cursor: ...
def commit(self) -> None: ...
def rollback(self) -> None: ...
def close(self) -> None: ...
class Cursor:
def execute(self, operation: str, args=None) -> 'Cursor': ...
def fetchone(self) -> list | None: ...
def fetchmany(self, num: int = None) -> tuple: ...
def fetchall(self) -> tuple: ...Comprehensive authentication system supporting multiple identity providers, IAM roles, and security protocols. Includes support for SAML, OAuth2, JWT, and browser-based authentication flows.
# IAM Authentication
conn = redshift_connector.connect(
iam=True,
cluster_identifier='my-cluster',
db_user='myuser',
# AWS credentials via profile, keys, or instance roles
)
# Identity Provider Authentication
conn = redshift_connector.connect(
credentials_provider='AdfsCredentialsProvider',
idp_host='example.com',
# Additional IdP-specific parameters
)Native integration with pandas and numpy for efficient data transfer between Redshift and Python data science workflows. Supports DataFrame I/O and numpy array operations.
class Cursor:
def fetch_dataframe(self, num: int = None) -> 'pandas.DataFrame': ...
def write_dataframe(self, df: 'pandas.DataFrame', table: str) -> None: ...
def fetch_numpy_array(self, num: int = None) -> 'numpy.ndarray': ...Complete DB-API 2.0 exception hierarchy providing structured error handling for different types of database and interface errors.
class Error(Exception): ...
class InterfaceError(Error): ...
class DatabaseError(Error): ...
class ProgrammingError(DatabaseError): ...
class OperationalError(DatabaseError): ...
# Additional exception classes...Comprehensive support for PostgreSQL and Redshift data types with Python object mapping, including arrays, JSON, geometric types, and date/time handling.
# DB-API 2.0 Type Constructors
def Date(year: int, month: int, day: int) -> date: ...
def Time(hour: int, minute: int, second: int) -> time: ...
def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime: ...
def Binary(value: bytes) -> bytes: ...
# PostgreSQL Type Classes
class PGJson: ...
class PGJsonb: ...
class PGEnum: ...Data Types and Type Conversion
Database schema introspection capabilities for retrieving metadata about tables, columns, procedures, and other database objects.
class Cursor:
def get_tables(self, catalog: str = None, schema: str = None, table: str = None, types: list = None) -> tuple: ...
def get_columns(self, catalog: str = None, schema: str = None, table: str = None, column: str = None) -> tuple: ...
def get_primary_keys(self, catalog: str = None, schema: str = None, table: str = None) -> tuple: ...
def get_procedures(self, catalog: str = None, schema: str = None, procedure: str = None) -> tuple: ...Database Metadata and Introspection
# DB-API 2.0 Constants
apilevel: str = "2.0"
threadsafety: int = 1
paramstyle: str = "format"
# Protocol and Configuration
DEFAULT_PROTOCOL_VERSION: int = 2
class ClientProtocolVersion(IntEnum):
BASE_SERVER = 0
EXTENDED_RESULT_METADATA = 1
BINARY = 2
class DbApiParamstyle(Enum):
QMARK = "qmark"
NUMERIC = "numeric"
NAMED = "named"
FORMAT = "format"
PYFORMAT = "pyformat"