or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

catalog.mdconfiguration.mddata-source.mdfunctions.mdindex.mdtable-api.md

functions.mddocs/

0

# Function Module

1

2

Integration with Hive built-in functions through Flink's module system, enabling access to hundreds of Hive functions within Flink SQL queries and providing seamless function compatibility between Hive and Flink.

3

4

## Capabilities

5

6

### HiveModule

7

8

Main module class that provides access to Hive built-in functions within Flink.

9

10

```java { .api }

11

/**

12

* Module providing Hive built-in functions to Flink

13

*/

14

public class HiveModule implements Module {

15

16

/**

17

* List all available Hive functions

18

* @return Set of function names

19

*/

20

public Set<String> listFunctions();

21

22

/**

23

* Get function definition by name

24

* @param name - Function name

25

* @return Optional function definition

26

*/

27

public Optional<FunctionDefinition> getFunctionDefinition(String name);

28

29

/**

30

* Get Hive version used by this module

31

* @return Hive version string

32

*/

33

public String getHiveVersion();

34

}

35

```

36

37

**Usage Examples:**

38

39

```java

40

import org.apache.flink.table.api.TableEnvironment;

41

import org.apache.flink.table.module.hive.HiveModule;

42

43

// Create table environment

44

TableEnvironment tableEnv = TableEnvironment.create(settings);

45

46

// Load HiveModule to access Hive functions

47

tableEnv.loadModule("hive", new HiveModule());

48

49

// Use Hive functions in SQL queries

50

tableEnv.executeSql(

51

"SELECT " +

52

" regexp_replace(name, '[0-9]', '') as clean_name, " + // Hive regex function

53

" from_unixtime(timestamp) as formatted_time, " + // Hive date function

54

" size(split(tags, ',')) as tag_count " + // Hive array functions

55

"FROM my_table"

56

);

57

58

// Use Hive aggregate functions

59

tableEnv.executeSql(

60

"SELECT " +

61

" department, " +

62

" percentile_approx(salary, 0.5) as median_salary, " + // Hive percentile

63

" collect_set(role) as unique_roles " + // Hive collection function

64

"FROM employees " +

65

"GROUP BY department"

66

);

67

```

68

69

### HiveModuleFactory

70

71

Factory for creating HiveModule instances from configuration properties.

72

73

```java { .api }

74

/**

75

* Factory for creating HiveModule instances

76

*/

77

public class HiveModuleFactory implements ModuleFactory {

78

79

/**

80

* Create HiveModule from configuration

81

* @param properties - Configuration properties

82

* @return HiveModule instance

83

*/

84

public Module createModule(Map<String, String> properties);

85

86

/**

87

* Get factory identifier

88

* @return Factory identifier string

89

*/

90

public String factoryIdentifier();

91

92

/**

93

* Get required configuration options

94

* @return Set of required options

95

*/

96

public Set<ConfigOption<?>> requiredOptions();

97

98

/**

99

* Get optional configuration options

100

* @return Set of optional options

101

*/

102

public Set<ConfigOption<?>> optionalOptions();

103

}

104

```

105

106

### Configuration Options

107

108

Configuration options for HiveModule behavior and version specification.

109

110

```java { .api }

111

/**

112

* Configuration options for HiveModule

113

*/

114

@PublicEvolving

115

public class HiveModuleOptions {

116

117

/** Hive version for module functionality */

118

public static final ConfigOption<String> HIVE_VERSION;

119

}

120

```

121

122

### SQL Parser Integration

123

124

Hive SQL dialect support through Flink's parser factory system.

125

126

```java { .api }

127

/**

128

* Parser factory for Hive SQL dialect support

129

*/

130

public class HiveParserFactory implements ParserFactory {

131

132

/**

133

* Create parser instance for Hive SQL dialect

134

* @return Parser instance

135

*/

136

public Parser create();

137

138

/**

139

* Get factory identifier

140

* @return Factory identifier string

141

*/

142

public String factoryIdentifier();

143

144

/**

145

* Get required configuration options

146

* @return Set of required options

147

*/

148

public Set<ConfigOption<?>> requiredOptions();

149

150

/**

151

* Get optional configuration options

152

* @return Set of optional options

153

*/

154

public Set<ConfigOption<?>> optionalOptions();

155

}

156

```

157

158

## Available Function Categories

159

160

### String Functions

161

162

Hive provides comprehensive string manipulation functions:

163

164

```sql

165

-- Pattern matching and replacement

166

SELECT regexp_replace(text, 'pattern', 'replacement') FROM table;

167

SELECT regexp_extract(text, 'pattern', group_index) FROM table;

168

169

-- String manipulation

170

SELECT concat_ws(',', col1, col2, col3) FROM table;

171

SELECT split(text, ',') FROM table;

172

SELECT trim(text) FROM table;

173

174

-- Case conversion

175

SELECT upper(text), lower(text) FROM table;

176

```

177

178

### Date and Time Functions

179

180

Access to Hive's date and time functions:

181

182

```sql

183

-- Date formatting and parsing

184

SELECT from_unixtime(timestamp_col) FROM table;

185

SELECT unix_timestamp(date_string, 'yyyy-MM-dd') FROM table;

186

187

-- Date arithmetic

188

SELECT date_add(date_col, 30) FROM table;

189

SELECT datediff(end_date, start_date) FROM table;

190

191

-- Date extraction

192

SELECT year(date_col), month(date_col), day(date_col) FROM table;

193

```

194

195

### Mathematical Functions

196

197

Comprehensive mathematical operations:

198

199

```sql

200

-- Aggregation functions

201

SELECT percentile_approx(value, 0.5) FROM table;

202

SELECT stddev_pop(value), variance(value) FROM table;

203

204

-- Mathematical operations

205

SELECT round(value, 2), ceil(value), floor(value) FROM table;

206

SELECT abs(value), pow(base, exponent) FROM table;

207

```

208

209

### Collection Functions

210

211

Array and map manipulation functions:

212

213

```sql

214

-- Array functions

215

SELECT size(array_col) FROM table;

216

SELECT array_contains(array_col, 'value') FROM table;

217

SELECT sort_array(array_col) FROM table;

218

219

-- Map functions

220

SELECT map_keys(map_col), map_values(map_col) FROM table;

221

SELECT map_size(map_col) FROM table;

222

223

-- Collection aggregation

224

SELECT collect_list(col), collect_set(col) FROM table GROUP BY key;

225

```

226

227

### Conditional Functions

228

229

Control flow and conditional logic:

230

231

```sql

232

-- Conditional expressions

233

SELECT if(condition, true_value, false_value) FROM table;

234

SELECT coalesce(col1, col2, 'default') FROM table;

235

236

-- Case expressions with Hive extensions

237

SELECT case

238

when col > 100 then 'high'

239

when col > 50 then 'medium'

240

else 'low'

241

end FROM table;

242

```

243

244

## Advanced Usage

245

246

### Module Loading and Configuration

247

248

Configure HiveModule with specific versions and options:

249

250

```java

251

// Load HiveModule with specific version

252

Map<String, String> moduleProperties = new HashMap<>();

253

moduleProperties.put("hive-version", "3.1.2");

254

255

HiveModuleFactory factory = new HiveModuleFactory();

256

Module hiveModule = factory.createModule(moduleProperties);

257

tableEnv.loadModule("hive", hiveModule);

258

259

// Use modules with priority

260

tableEnv.loadModule("hive", new HiveModule());

261

tableEnv.loadModule("core", CoreModule.INSTANCE);

262

263

// List loaded modules

264

String[] modules = tableEnv.listModules();

265

```

266

267

### Function Resolution Order

268

269

Understand how Flink resolves functions when multiple modules are loaded:

270

271

```java

272

// Modules are resolved in load order

273

tableEnv.loadModule("core", CoreModule.INSTANCE); // First priority

274

tableEnv.loadModule("hive", new HiveModule()); // Second priority

275

276

// Explicitly use Hive functions when name conflicts exist

277

tableEnv.executeSql("SELECT hive.size(array_col) FROM table");

278

```

279

280

### Custom Function Integration

281

282

Combine Hive functions with custom UDFs:

283

284

```java

285

// Register custom UDF

286

tableEnv.createTemporarySystemFunction("my_func", MyCustomUDF.class);

287

288

// Use both Hive and custom functions

289

tableEnv.executeSql(

290

"SELECT " +

291

" my_func(col1) as custom_result, " +

292

" regexp_replace(col2, 'pattern', 'replacement') as hive_result " +

293

"FROM table"

294

);

295

```

296

297

### Performance Considerations

298

299

Optimize function usage for better performance:

300

301

```sql

302

-- Use Hive functions for complex string operations

303

SELECT regexp_replace(large_text, complex_pattern, replacement) FROM large_table;

304

305

-- Leverage Hive's optimized aggregate functions

306

SELECT percentile_approx(value, array(0.25, 0.5, 0.75)) FROM table GROUP BY key;

307

308

-- Use collect functions for data restructuring

309

SELECT key, collect_list(struct(col1, col2, col3)) as nested_data

310

FROM table GROUP BY key;

311

```