Tessl Tile for maven/org.apache.flink/flink-sql-connector-hive-2.3.9_2.12@1.19.0

or run

npx @tessl/cli init

Version

Tile

Overview

Evals

Files

docs

catalog-integration.md configuration.md function-module.md index.md partition-management.md table-source-sink.md

partition-management.mddocs/

0
# Partition Management and Utilities
1

2
Advanced partition handling utilities for efficient data access, partition pruning, and metadata management in partitioned Hive tables. These utilities provide comprehensive support for partition discovery, serialization, and manipulation in both batch and streaming scenarios.
3

4
## Capabilities
5

6
### HiveTablePartition
7

8
Core data structure representing a Hive table partition or whole table for non-partitioned tables.
9

10
```java { .api }
11
/**
12
 * Represents a Hive table partition or whole table
13
 * Encapsulates storage descriptor and partition specification
14
 * Used for partition-aware data processing and metadata operations
15
 */
16
@PublicEvolving
17
public class HiveTablePartition implements Serializable {
18
    
19
    /**
20
     * Create partition representation for non-partitioned table
21
     * @param storageDescriptor Hive storage descriptor with location and format info
22
     * @param tableParameters Table-level parameters and properties
23
     * @return HiveTablePartition representing the whole table
24
     */
25
    public static HiveTablePartition ofTable(StorageDescriptor storageDescriptor, 
26
                                           Map<String, String> tableParameters);
27
    
28
    /**
29
     * Create partition representation for partitioned table
30
     * @param storageDescriptor Partition storage descriptor with location and format info
31
     * @param partitionSpec Partition key-value specification (e.g., {year=2024, month=01})
32
     * @param tableParameters Table-level parameters and properties
33
     * @return HiveTablePartition representing the specific partition
34
     */
35
    public static HiveTablePartition ofPartition(StorageDescriptor storageDescriptor,
36
                                                Map<String, String> partitionSpec,
37
                                                Map<String, String> tableParameters);
38
    
39
    /**
40
     * Get storage descriptor containing location, input/output formats, and SerDe info
41
     * @return StorageDescriptor with partition storage details
42
     */
43
    public StorageDescriptor getStorageDescriptor();
44
    
45
    /**
46
     * Get partition specification as key-value pairs
47
     * @return Map of partition keys to values, empty for non-partitioned tables
48
     */
49
    public Map<String, String> getPartitionSpec();
50
    
51
    /**
52
     * Get table-level parameters and properties
53
     * @return Map of table parameters
54
     */
55
    public Map<String, String> getTableParameters();
56
    
57
    /**
58
     * Check if this represents a partitioned table entry
59
     * @return true if partition has non-empty partition specification
60
     */
61
    public boolean isPartitioned();
62
    
63
    /**
64
     * Get partition location path
65
     * @return String path to partition data location
66
     */
67
    public String getLocation();
68
    
69
    /**
70
     * Get input format class name
71
     * @return Input format class for reading partition data
72
     */
73
    public String getInputFormat();
74
    
75
    /**
76
     * Get output format class name
77
     * @return Output format class for writing partition data
78
     */
79
    public String getOutputFormat();
80
    
81
    /**
82
     * Get SerDe (Serializer/Deserializer) class name
83
     * @return SerDe class for data serialization
84
     */
85
    public String getSerDe();
86
}
87
```
88

89
**Usage Examples:**
90

91
```java
92
import org.apache.flink.connectors.hive.HiveTablePartition;
93
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
94

95
// Create partition for partitioned table
96
StorageDescriptor partitionSd = new StorageDescriptor();
97
partitionSd.setLocation("hdfs://namenode:9000/warehouse/events/year=2024/month=01");
98
partitionSd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
99
partitionSd.setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
100

101
Map<String, String> partitionSpec = Map.of(
102
    "year", "2024",
103
    "month", "01"
104
);
105

106
Map<String, String> tableParams = Map.of(
107
    "table.type", "EXTERNAL_TABLE",
108
    "transient_lastDdlTime", "1640995200"
109
);
110

111
HiveTablePartition partition = HiveTablePartition.ofPartition(
112
    partitionSd, 
113
    partitionSpec, 
114
    tableParams
115
);
116

117
System.out.println("Partition location: " + partition.getLocation());
118
System.out.println("Partition spec: " + partition.getPartitionSpec());
119
System.out.println("Is partitioned: " + partition.isPartitioned());
120

121
// Create representation for non-partitioned table
122
StorageDescriptor tableSd = new StorageDescriptor();
123
tableSd.setLocation("hdfs://namenode:9000/warehouse/users");
124
tableSd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
125

126
HiveTablePartition wholeTable = HiveTablePartition.ofTable(tableSd, tableParams);
127
System.out.println("Table location: " + wholeTable.getLocation());
128
System.out.println("Is partitioned: " + wholeTable.isPartitioned()); // false
129
```
130

131
### HivePartitionUtils
132

133
Utility class providing comprehensive partition management operations including discovery, serialization, and metadata handling.
134

135
```java { .api }
136
/**
137
 * Utility class for Hive partition operations
138
 * Provides methods for partition discovery, serialization, and metadata management
139
 */
140
public class HivePartitionUtils {
141
    
142
    /**
143
     * Get all partitions for a table, including metadata and storage descriptors
144
     * @param jobConf Hadoop JobConf with Hive and HDFS configuration
145
     * @param hiveVersion Hive version for compatibility
146
     * @param tablePath Table path (database.table)
147
     * @param partitionColNames List of partition column names for validation
148
     * @param remainingPartitions List of remaining partition specifications to filter
149
     * @return List of all table partitions with complete metadata
150
     */
151
    public static List<HiveTablePartition> getAllPartitions(JobConf jobConf,
152
                                                           String hiveVersion,
153
                                                           ObjectPath tablePath, 
154
                                                           List<String> partitionColNames,
155
                                                           List<Map<String, String>> remainingPartitions);
156
    
157
    /**
158
     * Serialize list of HiveTablePartition objects to list of byte arrays
159
     * Used for efficient partition metadata transfer and caching
160
     * @param partitions List of partitions to serialize
161
     * @return List of serialized byte arrays, one per partition
162
     */
163
    public static List<byte[]> serializeHiveTablePartition(List<HiveTablePartition> partitions);
164
    
165
    /**
166
     * Deserialize list of byte arrays back to list of HiveTablePartition objects
167
     * @param partitionBytes List of serialized partition data
168
     * @return List of deserialized partitions
169
     */
170
    public static List<HiveTablePartition> deserializeHiveTablePartition(List<byte[]> partitionBytes);
171
}
172
```
173

174
**Usage Examples:**
175

176
```java
177
import org.apache.flink.connectors.hive.util.HivePartitionUtils;
178
import org.apache.hadoop.mapred.JobConf;
179

180
// Configure Hadoop JobConf
181
JobConf jobConf = new JobConf();
182
jobConf.set("hive.metastore.uris", "thrift://metastore:9083");
183
jobConf.set("fs.defaultFS", "hdfs://namenode:9000");
184

185
// Get all partitions for a table
186
ObjectPath tablePath = new ObjectPath("sales", "transactions");
187
List<String> partitionColumns = Arrays.asList("year", "month", "day");
188

189
List<HiveTablePartition> allPartitions = HivePartitionUtils.getAllPartitions(
190
    jobConf, 
191
    "2.3.9",  // hiveVersion
192
    tablePath, 
193
    partitionColumns,
194
    Collections.emptyList()  // remainingPartitions
195
);
196

197
System.out.println("Found " + allPartitions.size() + " partitions");
198

199
// Filter partitions for specific date range  
200
List<HiveTablePartition> filteredPartitions = allPartitions.stream()
201
    .filter(p -> {
202
        Map<String, String> spec = p.getPartitionSpec();
203
        return "2024".equals(spec.get("year")) && 
204
               Integer.parseInt(spec.get("month")) >= 1 &&
205
               Integer.parseInt(spec.get("month")) <= 3;
206
    })
207
    .collect(Collectors.toList());
208

209
System.out.println("Q1 2024 partitions: " + filteredPartitions.size());
210

211
// Serialize partitions for caching
212
List<byte[]> serializedPartitions = HivePartitionUtils.serializeHiveTablePartition(filteredPartitions);
213
System.out.println("Serialized " + serializedPartitions.size() + " partitions");
214

215
// Deserialize partitions
216
List<HiveTablePartition> deserializedPartitions = HivePartitionUtils.deserializeHiveTablePartition(
217
    serializedPartitions
218
);
219

220
System.out.println("Deserialized " + deserializedPartitions.size() + " partitions");
221
```
222

223

Version

Tile

Files

partition-management.md.css-3qkkll{font-size:var(--chakra-font-sizes-sm);font-weight:var(--chakra-font-weights-normal);color:var(--chakra-colors-gray-300);}docs/

partition-management.mddocs/