Amazon Redshift connector for Python implementing Python Database API Specification 2.0
—
Comprehensive support for PostgreSQL and Redshift data types with Python object mapping, including arrays, JSON, geometric types, and date/time handling. The redshift_connector provides seamless conversion between database types and Python objects for efficient data processing.
Standard DB-API 2.0 constructor functions for creating database-compatible objects from Python values.
def Date(year: int, month: int, day: int) -> date:
"""
Construct an object holding a date value.
Parameters:
- year: Year value
- month: Month value (1-12)
- day: Day value (1-31)
Returns:
datetime.date object
This function is part of the DB-API 2.0 specification.
"""
def Time(hour: int, minute: int, second: int) -> time:
"""
Construct an object holding a time value.
Parameters:
- hour: Hour value (0-23)
- minute: Minute value (0-59)
- second: Second value (0-59)
Returns:
datetime.time object
This function is part of the DB-API 2.0 specification.
"""
def Timestamp(year: int, month: int, day: int, hour: int, minute: int, second: int) -> datetime:
"""
Construct an object holding a timestamp value.
Parameters:
- year: Year value
- month: Month value (1-12)
- day: Day value (1-31)
- hour: Hour value (0-23)
- minute: Minute value (0-59)
- second: Second value (0-59)
Returns:
datetime.datetime object
This function is part of the DB-API 2.0 specification.
"""
def DateFromTicks(ticks: float) -> date:
"""
Construct an object holding a date value from the given ticks value
(number of seconds since the epoch).
Parameters:
- ticks: Seconds since Unix epoch
Returns:
datetime.date object
This function is part of the DB-API 2.0 specification.
"""
def TimeFromTicks(ticks: float) -> time:
"""
Construct an object holding a time value from the given ticks value
(number of seconds since the epoch).
Parameters:
- ticks: Seconds since Unix epoch
Returns:
datetime.time object
This function is part of the DB-API 2.0 specification.
"""
def TimestampFromTicks(ticks: float) -> datetime:
"""
Construct an object holding a timestamp value from the given ticks value
(number of seconds since the epoch).
Parameters:
- ticks: Seconds since Unix epoch
Returns:
datetime.datetime object
This function is part of the DB-API 2.0 specification.
"""
def Binary(value: bytes) -> bytes:
"""
Construct an object holding binary data.
Parameters:
- value: Binary data as bytes
Returns:
bytes object
This function is part of the DB-API 2.0 specification.
"""Specialized type classes for PostgreSQL and Redshift specific data types that don't have direct Python equivalents.
class PGType:
"""Base class for PostgreSQL-specific types."""
def __init__(self, value) -> None:
"""Initialize with a value."""
self.value = value
def encode(self, encoding: str) -> bytes:
"""Encode the value using specified encoding."""
return str(self.value).encode(encoding)
class PGEnum(PGType):
"""
PostgreSQL enum type wrapper.
Handles both string values and Python enum objects.
"""
def __init__(self, value) -> None:
"""
Initialize with enum value.
Parameters:
- value: String or enum object
"""
if isinstance(value, str):
self.value = value
else:
self.value = value.value
class PGJson(PGType):
"""
PostgreSQL JSON type wrapper.
Automatically serializes Python objects to JSON strings.
"""
def encode(self, encoding: str) -> bytes:
"""Encode the value as JSON using specified encoding."""
from json import dumps
return dumps(self.value).encode(encoding)
class PGJsonb(PGType):
"""
PostgreSQL JSONB type wrapper.
Automatically serializes Python objects to JSON strings for binary JSON storage.
"""
def encode(self, encoding: str) -> bytes:
"""Encode the value as JSON using specified encoding."""
from json import dumps
return dumps(self.value).encode(encoding)
class PGTsvector(PGType):
"""
PostgreSQL text search vector type wrapper.
Used for full-text search functionality.
"""
class PGVarchar(str):
"""
PostgreSQL VARCHAR type wrapper.
Inherits from str but provides type identification for the driver.
"""
class PGText(str):
"""
PostgreSQL TEXT type wrapper.
Inherits from str but provides type identification for the driver.
"""Object identifier constants for all supported PostgreSQL and Redshift data types.
class RedshiftOID(IntEnum):
"""Enum containing all PostgreSQL/Redshift type OID constants."""
# Integer Types
BIGINT = 20
INTEGER = 23
SMALLINT = 21
# Floating Point Types
FLOAT = 701
REAL = 700
DOUBLE_PRECISION = 701
# Numeric Types
NUMERIC = 1700
DECIMAL = 1700
# Boolean Type
BOOLEAN = 16
# Character Types
VARCHAR = 1043
CHAR = 1042
TEXT = 25
BPCHAR = 1042
# Binary Types
BYTES = 17
BYTEA = 17
# Date/Time Types
DATE = 1082
TIME = 1083
TIMETZ = 1266
TIMESTAMP = 1114
TIMESTAMPTZ = 1184
INTERVAL = 1186
# JSON Types
JSON = 114
JSONB = 3802
# Array Types (examples)
BIGINT_ARRAY = 1016
INTEGER_ARRAY = 1007
VARCHAR_ARRAY = 1015
BOOLEAN_ARRAY = 1000
# Geometric Types
GEOMETRY = 3000
GEOGRAPHY = 3001
# Network Types
INET = 869
CIDR = 650
# UUID Type
UUID = 2950
# Other Types
OID = 26
NULL = 0
# All OID constants are also available at module level
BIGINT = RedshiftOID.BIGINT.value
INTEGER = RedshiftOID.INTEGER.value
VARCHAR = RedshiftOID.VARCHAR.value
# ... etcConfiguration options for controlling how database types are converted to Python objects.
# Numeric conversion configuration
conn = redshift_connector.connect(
# ... connection parameters
numeric_to_float=False # Use Decimal objects for NUMERIC/DECIMAL (default)
# numeric_to_float=True # Convert NUMERIC/DECIMAL to Python float
)
# Protocol version affects type conversion behavior
conn = redshift_connector.connect(
# ... connection parameters
client_protocol_version=2 # BINARY protocol (default, most efficient)
# client_protocol_version=1 # EXTENDED_RESULT_METADATA protocol
# client_protocol_version=0 # BASE_SERVER protocol (text-based)
)Comprehensive support for PostgreSQL array types with automatic conversion to/from Python lists.
# Array type usage examples
# Insert array data
cursor.execute(
"INSERT INTO array_table (int_array, text_array) VALUES (%s, %s)",
([1, 2, 3, 4, 5], ['apple', 'banana', 'cherry'])
)
# Retrieve array data
cursor.execute("SELECT int_array, text_array FROM array_table")
row = cursor.fetchone()
int_array = row[0] # Returns Python list: [1, 2, 3, 4, 5]
text_array = row[1] # Returns Python list: ['apple', 'banana', 'cherry']
# Multi-dimensional arrays
cursor.execute(
"INSERT INTO matrix_table (matrix) VALUES (%s)",
([[1, 2], [3, 4]])
)
# Array utility functions (from utils.array_util)
from redshift_connector.utils import (
array_check_dimensions,
array_dim_lengths,
array_find_first_element,
array_flatten,
array_has_null
)
# Validate array structure before insertion
data = [[1, 2, 3], [4, 5, 6]]
if array_check_dimensions(data):
cursor.execute("INSERT INTO table (matrix_col) VALUES (%s)", (data,))Native support for JSON data types with automatic serialization/deserialization.
import redshift_connector
from redshift_connector import PGJson, PGJsonb
cursor = conn.cursor()
# Insert JSON data
json_data = {
"name": "John Doe",
"age": 30,
"addresses": [
{"type": "home", "city": "New York"},
{"type": "work", "city": "Boston"}
]
}
# Using PGJson wrapper
cursor.execute(
"INSERT INTO user_data (id, profile) VALUES (%s, %s)",
(1, PGJson(json_data))
)
# Using PGJsonb wrapper for binary JSON
cursor.execute(
"INSERT INTO user_data (id, profile) VALUES (%s, %s)",
(2, PGJsonb(json_data))
)
# Direct insertion (automatically handled)
cursor.execute(
"INSERT INTO user_data (id, profile) VALUES (%s, %s)",
(3, json_data) # Automatically converted to JSON
)
# Retrieve JSON data
cursor.execute("SELECT profile FROM user_data WHERE id = %s", (1,))
profile = cursor.fetchone()[0] # Returns parsed Python dict/list
print(profile['name']) # Access JSON data directlyComprehensive date and time type support with timezone handling and interval types.
from datetime import date, time, datetime, timedelta
import redshift_connector
cursor = conn.cursor()
# Date types
today = date.today()
cursor.execute("INSERT INTO events (event_date) VALUES (%s)", (today,))
# Time types
event_time = time(14, 30, 0) # 2:30 PM
cursor.execute("INSERT INTO events (event_time) VALUES (%s)", (event_time,))
# Timestamp types
event_timestamp = datetime(2023, 12, 25, 14, 30, 0)
cursor.execute("INSERT INTO events (event_timestamp) VALUES (%s)", (event_timestamp,))
# Using DB-API constructors
from redshift_connector import Date, Time, Timestamp
new_date = Date(2023, 12, 25)
new_time = Time(14, 30, 0)
new_timestamp = Timestamp(2023, 12, 25, 14, 30, 0)
cursor.execute(
"INSERT INTO events (event_date, event_time, event_timestamp) VALUES (%s, %s, %s)",
(new_date, new_time, new_timestamp)
)
# Interval types (Redshift specific)
cursor.execute("SELECT event_timestamp + INTERVAL '1 day' FROM events")
future_dates = cursor.fetchall()Support for binary data types including BYTEA and geometry types.
from redshift_connector import Binary
# Insert binary data
binary_data = b'\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR...' # PNG image data
cursor.execute(
"INSERT INTO images (filename, data) VALUES (%s, %s)",
('logo.png', Binary(binary_data))
)
# Retrieve binary data
cursor.execute("SELECT data FROM images WHERE filename = %s", ('logo.png',))
image_data = cursor.fetchone()[0] # Returns bytes object
# Geometry data (Redshift spatial types)
cursor.execute("SELECT ST_AsText(geom) FROM spatial_table")
geometry_text = cursor.fetchone()[0] # WKT format stringLow-level utilities for custom type conversion and handling edge cases.
# Type conversion utility functions (from utils.type_utils)
from redshift_connector.utils import (
numeric_in, # Parse numeric text input
numeric_in_binary, # Parse numeric binary input
date_in, # Parse date text input
time_in, # Parse time text input
text_recv, # Receive text data
array_recv_text, # Receive array text data
array_recv_binary # Receive array binary data
)
# Type mapping constants
from redshift_connector.utils import py_types, redshift_types
# Python type to Redshift type mapping
python_type = int
redshift_oid = py_types.get(python_type) # Get corresponding Redshift OID
# Redshift type information
type_info = redshift_types.get(RedshiftOID.INTEGER) # Get type conversion infoPatterns for handling custom types and extending type conversion capabilities.
import redshift_connector
from decimal import Decimal
from uuid import UUID
# Custom type handling examples
cursor = conn.cursor()
# UUID handling
user_id = UUID('12345678-1234-5678-1234-567812345678')
cursor.execute("INSERT INTO users (id, name) VALUES (%s, %s)", (str(user_id), 'John'))
# Decimal precision handling
price = Decimal('99.99')
cursor.execute("INSERT INTO products (price) VALUES (%s)", (price,))
# Custom enum handling
from enum import Enum
from redshift_connector import PGEnum
class Status(Enum):
ACTIVE = 'active'
INACTIVE = 'inactive'
PENDING = 'pending'
status = Status.ACTIVE
cursor.execute("INSERT INTO records (status) VALUES (%s)", (PGEnum(status),))
# Large object handling
with open('large_file.txt', 'rb') as f:
large_data = f.read()
cursor.execute("INSERT INTO documents (content) VALUES (%s)", (Binary(large_data),))Install with Tessl CLI
npx tessl i tessl/pypi-redshift-connector