0
# Partition Management and Utilities
1
2
Advanced partition handling utilities for efficient data access, partition pruning, and metadata management in partitioned Hive tables. These utilities provide comprehensive support for partition discovery, serialization, and manipulation in both batch and streaming scenarios.
3
4
## Capabilities
5
6
### HiveTablePartition
7
8
Core data structure representing a Hive table partition or whole table for non-partitioned tables.
9
10
```java { .api }
11
/**
12
* Represents a Hive table partition or whole table
13
* Encapsulates storage descriptor and partition specification
14
* Used for partition-aware data processing and metadata operations
15
*/
16
@PublicEvolving
17
public class HiveTablePartition implements Serializable {
18
19
/**
20
* Create partition representation for non-partitioned table
21
* @param storageDescriptor Hive storage descriptor with location and format info
22
* @param tableParameters Table-level parameters and properties
23
* @return HiveTablePartition representing the whole table
24
*/
25
public static HiveTablePartition ofTable(StorageDescriptor storageDescriptor,
26
Map<String, String> tableParameters);
27
28
/**
29
* Create partition representation for partitioned table
30
* @param storageDescriptor Partition storage descriptor with location and format info
31
* @param partitionSpec Partition key-value specification (e.g., {year=2024, month=01})
32
* @param tableParameters Table-level parameters and properties
33
* @return HiveTablePartition representing the specific partition
34
*/
35
public static HiveTablePartition ofPartition(StorageDescriptor storageDescriptor,
36
Map<String, String> partitionSpec,
37
Map<String, String> tableParameters);
38
39
/**
40
* Get storage descriptor containing location, input/output formats, and SerDe info
41
* @return StorageDescriptor with partition storage details
42
*/
43
public StorageDescriptor getStorageDescriptor();
44
45
/**
46
* Get partition specification as key-value pairs
47
* @return Map of partition keys to values, empty for non-partitioned tables
48
*/
49
public Map<String, String> getPartitionSpec();
50
51
/**
52
* Get table-level parameters and properties
53
* @return Map of table parameters
54
*/
55
public Map<String, String> getTableParameters();
56
57
/**
58
* Check if this represents a partitioned table entry
59
* @return true if partition has non-empty partition specification
60
*/
61
public boolean isPartitioned();
62
63
/**
64
* Get partition location path
65
* @return String path to partition data location
66
*/
67
public String getLocation();
68
69
/**
70
* Get input format class name
71
* @return Input format class for reading partition data
72
*/
73
public String getInputFormat();
74
75
/**
76
* Get output format class name
77
* @return Output format class for writing partition data
78
*/
79
public String getOutputFormat();
80
81
/**
82
* Get SerDe (Serializer/Deserializer) class name
83
* @return SerDe class for data serialization
84
*/
85
public String getSerDe();
86
}
87
```
88
89
**Usage Examples:**
90
91
```java
92
import org.apache.flink.connectors.hive.HiveTablePartition;
93
import org.apache.hadoop.hive.metastore.api.StorageDescriptor;
94
95
// Create partition for partitioned table
96
StorageDescriptor partitionSd = new StorageDescriptor();
97
partitionSd.setLocation("hdfs://namenode:9000/warehouse/events/year=2024/month=01");
98
partitionSd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
99
partitionSd.setOutputFormat("org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat");
100
101
Map<String, String> partitionSpec = Map.of(
102
"year", "2024",
103
"month", "01"
104
);
105
106
Map<String, String> tableParams = Map.of(
107
"table.type", "EXTERNAL_TABLE",
108
"transient_lastDdlTime", "1640995200"
109
);
110
111
HiveTablePartition partition = HiveTablePartition.ofPartition(
112
partitionSd,
113
partitionSpec,
114
tableParams
115
);
116
117
System.out.println("Partition location: " + partition.getLocation());
118
System.out.println("Partition spec: " + partition.getPartitionSpec());
119
System.out.println("Is partitioned: " + partition.isPartitioned());
120
121
// Create representation for non-partitioned table
122
StorageDescriptor tableSd = new StorageDescriptor();
123
tableSd.setLocation("hdfs://namenode:9000/warehouse/users");
124
tableSd.setInputFormat("org.apache.hadoop.mapred.TextInputFormat");
125
126
HiveTablePartition wholeTable = HiveTablePartition.ofTable(tableSd, tableParams);
127
System.out.println("Table location: " + wholeTable.getLocation());
128
System.out.println("Is partitioned: " + wholeTable.isPartitioned()); // false
129
```
130
131
### HivePartitionUtils
132
133
Utility class providing comprehensive partition management operations including discovery, serialization, and metadata handling.
134
135
```java { .api }
136
/**
137
* Utility class for Hive partition operations
138
* Provides methods for partition discovery, serialization, and metadata management
139
*/
140
public class HivePartitionUtils {
141
142
/**
143
* Get all partitions for a table, including metadata and storage descriptors
144
* @param jobConf Hadoop JobConf with Hive and HDFS configuration
145
* @param hiveVersion Hive version for compatibility
146
* @param tablePath Table path (database.table)
147
* @param partitionColNames List of partition column names for validation
148
* @param remainingPartitions List of remaining partition specifications to filter
149
* @return List of all table partitions with complete metadata
150
*/
151
public static List<HiveTablePartition> getAllPartitions(JobConf jobConf,
152
String hiveVersion,
153
ObjectPath tablePath,
154
List<String> partitionColNames,
155
List<Map<String, String>> remainingPartitions);
156
157
/**
158
* Serialize list of HiveTablePartition objects to list of byte arrays
159
* Used for efficient partition metadata transfer and caching
160
* @param partitions List of partitions to serialize
161
* @return List of serialized byte arrays, one per partition
162
*/
163
public static List<byte[]> serializeHiveTablePartition(List<HiveTablePartition> partitions);
164
165
/**
166
* Deserialize list of byte arrays back to list of HiveTablePartition objects
167
* @param partitionBytes List of serialized partition data
168
* @return List of deserialized partitions
169
*/
170
public static List<HiveTablePartition> deserializeHiveTablePartition(List<byte[]> partitionBytes);
171
}
172
```
173
174
**Usage Examples:**
175
176
```java
177
import org.apache.flink.connectors.hive.util.HivePartitionUtils;
178
import org.apache.hadoop.mapred.JobConf;
179
180
// Configure Hadoop JobConf
181
JobConf jobConf = new JobConf();
182
jobConf.set("hive.metastore.uris", "thrift://metastore:9083");
183
jobConf.set("fs.defaultFS", "hdfs://namenode:9000");
184
185
// Get all partitions for a table
186
ObjectPath tablePath = new ObjectPath("sales", "transactions");
187
List<String> partitionColumns = Arrays.asList("year", "month", "day");
188
189
List<HiveTablePartition> allPartitions = HivePartitionUtils.getAllPartitions(
190
jobConf,
191
"2.3.9", // hiveVersion
192
tablePath,
193
partitionColumns,
194
Collections.emptyList() // remainingPartitions
195
);
196
197
System.out.println("Found " + allPartitions.size() + " partitions");
198
199
// Filter partitions for specific date range
200
List<HiveTablePartition> filteredPartitions = allPartitions.stream()
201
.filter(p -> {
202
Map<String, String> spec = p.getPartitionSpec();
203
return "2024".equals(spec.get("year")) &&
204
Integer.parseInt(spec.get("month")) >= 1 &&
205
Integer.parseInt(spec.get("month")) <= 3;
206
})
207
.collect(Collectors.toList());
208
209
System.out.println("Q1 2024 partitions: " + filteredPartitions.size());
210
211
// Serialize partitions for caching
212
List<byte[]> serializedPartitions = HivePartitionUtils.serializeHiveTablePartition(filteredPartitions);
213
System.out.println("Serialized " + serializedPartitions.size() + " partitions");
214
215
// Deserialize partitions
216
List<HiveTablePartition> deserializedPartitions = HivePartitionUtils.deserializeHiveTablePartition(
217
serializedPartitions
218
);
219
220
System.out.println("Deserialized " + deserializedPartitions.size() + " partitions");
221
```
222
223