Apache Flink SQL connector that enables seamless integration with HBase 2.2.x databases through Flink's Table API and SQL interface
—
Comprehensive configuration options for HBase connections, Zookeeper settings, performance tuning, and operational parameters.
Essential settings that must be provided for the connector to function.
WITH (
'connector' = 'hbase-2.2', -- Required: Connector identifier
'table-name' = 'hbase_table_name' -- Required: HBase table name
)Usage Examples:
CREATE TABLE minimal_config (
rowkey STRING,
data ROW<value STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'my_hbase_table'
);Settings for establishing connections to the HBase cluster through Zookeeper.
WITH (
'zookeeper.quorum' = 'host1:port1,host2:port2,...', -- Zookeeper ensemble
'zookeeper.znode.parent' = '/hbase' -- Zookeeper root path (default: '/hbase')
)Parameters:
zookeeper.quorum: Comma-separated list of Zookeeper servers with optional portszookeeper.znode.parent: Root directory in Zookeeper for HBase cluster metadataUsage Examples:
-- Single Zookeeper node (development)
CREATE TABLE dev_table (
rowkey STRING,
data ROW<value STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'dev_data',
'zookeeper.quorum' = 'localhost:2181'
);
-- Production cluster with multiple Zookeeper nodes
CREATE TABLE prod_table (
rowkey STRING,
info ROW<name STRING, timestamp BIGINT>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'production_data',
'zookeeper.quorum' = 'zk1.example.com:2181,zk2.example.com:2181,zk3.example.com:2181',
'zookeeper.znode.parent' = '/hbase-prod'
);Options for controlling how data is processed and represented.
WITH (
'null-string-literal' = 'null' -- Null value representation (default: 'null')
)Parameters:
null-string-literal: String representation used for null values in string fieldsUsage Examples:
-- Custom null representation
CREATE TABLE custom_nulls (
rowkey STRING,
data ROW<optional_field STRING, required_field STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'nullable_data',
'zookeeper.quorum' = 'localhost:2181',
'null-string-literal' = 'N/A'
);Settings for controlling write operations, buffering behavior, and performance tuning.
WITH (
'sink.buffer-flush.max-size' = '2mb', -- Buffer size threshold (default: 2MB)
'sink.buffer-flush.max-rows' = '1000', -- Buffer row count threshold (default: 1000)
'sink.buffer-flush.interval' = '1s', -- Time-based flush interval (default: 1s)
'sink.parallelism' = '1' -- Sink operator parallelism
)Parameters:
sink.buffer-flush.max-size: Maximum memory size for buffered mutations before flushingsink.buffer-flush.max-rows: Maximum number of rows to buffer before flushingsink.buffer-flush.interval: Maximum time to wait before flushing buffered operationssink.parallelism: Number of parallel sink operators (affects write throughput)Usage Examples:
-- High-throughput sink configuration
CREATE TABLE high_volume_sink (
rowkey STRING,
metrics ROW<value DOUBLE, timestamp BIGINT, source STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'metrics_data',
'zookeeper.quorum' = 'localhost:2181',
'sink.buffer-flush.max-size' = '10mb',
'sink.buffer-flush.max-rows' = '5000',
'sink.buffer-flush.interval' = '5s',
'sink.parallelism' = '4'
);
-- Low-latency sink configuration
CREATE TABLE low_latency_sink (
rowkey STRING,
events ROW<event_type STRING, payload STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'event_stream',
'zookeeper.quorum' = 'localhost:2181',
'sink.buffer-flush.max-size' = '100kb',
'sink.buffer-flush.max-rows' = '50',
'sink.buffer-flush.interval' = '100ms'
);Settings for temporal table joins, caching behavior, and retry logic.
WITH (
'lookup.async' = 'false', -- Enable async lookup (default: false)
'lookup.cache.max-rows' = '-1', -- Cache size limit (default: -1, disabled)
'lookup.cache.ttl' = '0', -- Cache time-to-live (default: 0, no expiration)
'lookup.max-retries' = '3' -- Maximum retry attempts (default: 3)
)Parameters:
lookup.async: Enable asynchronous lookup operations for better throughputlookup.cache.max-rows: Maximum number of lookup results to cache (-1 disables caching)lookup.cache.ttl: Cache entry expiration time (0 means no expiration)lookup.max-retries: Number of retry attempts for failed lookup operationsUsage Examples:
-- High-performance async lookup with caching
CREATE TABLE cached_lookup (
rowkey STRING,
user_data ROW<name STRING, email STRING, preferences STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'user_profiles',
'zookeeper.quorum' = 'localhost:2181',
'lookup.async' = 'true',
'lookup.cache.max-rows' = '10000',
'lookup.cache.ttl' = '300s', -- 5 minute cache
'lookup.max-retries' = '5'
);
-- Simple synchronous lookup without caching
CREATE TABLE sync_lookup (
rowkey STRING,
reference_data ROW<description STRING, category STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'reference_table',
'zookeeper.quorum' = 'localhost:2181',
'lookup.async' = 'false'
);Pass-through mechanism for additional HBase client configuration properties using the properties prefix.
WITH (
'properties.*' = 'value' -- HBase configuration pass-through
)Parameters:
properties.*: Any HBase configuration property can be passed by prefixing with "properties."Usage Examples:
-- Kerberos authentication configuration
CREATE TABLE secure_hbase (
rowkey STRING,
data ROW<value STRING, timestamp BIGINT>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'secure_data',
'zookeeper.quorum' = 'localhost:2181',
'properties.hbase.security.authentication' = 'kerberos',
'properties.hbase.security.authorization' = 'true',
'properties.hbase.kerberos.regionserver.principal' = 'hbase/_HOST@REALM.COM'
);
-- Performance tuning with HBase client timeouts
CREATE TABLE tuned_hbase (
rowkey STRING,
metrics ROW<cpu DOUBLE, memory BIGINT>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'performance_data',
'zookeeper.quorum' = 'localhost:2181',
'properties.hbase.client.scanner.timeout.period' = '120000', -- 2 minutes
'properties.hbase.rpc.timeout' = '60000', -- 1 minute
'properties.hbase.regionserver.lease.period' = '120000', -- 2 minutes
'properties.hbase.client.operation.timeout' = '90000' -- 90 seconds
);
-- Custom HBase client connection pooling
CREATE TABLE pooled_hbase (
rowkey STRING,
data ROW<payload STRING>,
PRIMARY KEY (rowkey) NOT ENFORCED
) WITH (
'connector' = 'hbase-2.2',
'table-name' = 'pooled_data',
'zookeeper.quorum' = 'localhost:2181',
'properties.hbase.client.max.total.tasks' = '100',
'properties.hbase.client.max.perserver.tasks' = '10',
'properties.hbase.client.max.perregion.tasks' = '1'
);Comprehensive example showing all configuration options together.
CREATE TABLE comprehensive_config (
transaction_id STRING,
transaction_data ROW<
amount DECIMAL(15,2),
currency STRING,
timestamp TIMESTAMP(3),
description STRING
>,
customer_info ROW<
customer_id STRING,
account_type STRING
>,
metadata ROW<
processing_time TIMESTAMP(3),
source_system STRING,
batch_id STRING
>,
PRIMARY KEY (transaction_id) NOT ENFORCED
) WITH (
-- Required settings
'connector' = 'hbase-2.2',
'table-name' = 'financial_transactions',
-- Connection settings
'zookeeper.quorum' = 'zk1.bank.com:2181,zk2.bank.com:2181,zk3.bank.com:2181',
'zookeeper.znode.parent' = '/hbase-production',
-- Data handling
'null-string-literal' = 'NULL',
-- Sink performance tuning
'sink.buffer-flush.max-size' = '16mb',
'sink.buffer-flush.max-rows' = '2000',
'sink.buffer-flush.interval' = '3s',
'sink.parallelism' = '8',
-- Lookup optimization
'lookup.async' = 'true',
'lookup.cache.max-rows' = '50000',
'lookup.cache.ttl' = '600s', -- 10 minute cache
'lookup.max-retries' = '5',
-- Advanced HBase configuration
'properties.hbase.client.scanner.timeout.period' = '300000', -- 5 minutes
'properties.hbase.rpc.timeout' = '120000', -- 2 minutes
'properties.hbase.security.authentication' = 'kerberos' -- Enable Kerberos
);The connector performs validation of configuration options at table creation time:
Connection Validation:
Schema Validation:
Performance Validation:
Install with Tessl CLI
npx tessl i tessl/maven-org-apache-flink--flink-sql-connector-hbase-2-2-2-11