or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

datasets.mdevaluation.mdindex.mdindexing.mdjava.mdretrieval.mdtext-processing.mdtransformers.mdutilities.md

java.mddocs/

0

# Java Integration

1

2

PyTerrier's Java integration layer provides seamless access to the underlying Terrier information retrieval platform through comprehensive Java VM management, configuration, and interoperability features.

3

4

## Capabilities

5

6

### Java VM Initialization

7

8

Core functions for initializing and managing the Java Virtual Machine that runs the Terrier platform.

9

10

```python { .api }

11

def init(version: str = None, **kwargs) -> None:

12

"""

13

Initialize the Java VM and Terrier platform.

14

15

Parameters:

16

- version: Specific Terrier version to use

17

- **kwargs: Additional configuration options

18

"""

19

20

def legacy_init(*args, **kwargs) -> None:

21

"""

22

Legacy initialization function (deprecated).

23

Issues deprecation warning and delegates to init().

24

"""

25

26

def started() -> bool:

27

"""

28

Check if the Java VM has been started.

29

30

Returns:

31

- True if Java VM is running, False otherwise

32

"""

33

34

def configure(**kwargs) -> None:

35

"""

36

Configure Java environment before initialization.

37

38

Parameters:

39

- **kwargs: Configuration options (memory, classpath, etc.)

40

"""

41

```

42

43

**Usage Examples:**

44

45

```python

46

# Basic initialization

47

if not pt.java.started():

48

pt.java.init()

49

50

# Initialize with specific version

51

pt.java.init(version='5.7')

52

53

# Check if already started

54

if pt.java.started():

55

print("Java VM is running")

56

57

# Configure before initialization

58

pt.java.configure(memory='4G', redirect_io=True)

59

pt.java.init()

60

```

61

62

### Memory and Performance Configuration

63

64

Functions for configuring JVM memory limits and performance settings.

65

66

```python { .api }

67

def set_memory_limit(memory: str) -> None:

68

"""

69

Set JVM memory limit (must be called before init).

70

71

Parameters:

72

- memory: Memory limit (e.g., '4G', '2048M', '1024m')

73

"""

74

75

def add_option(option: str) -> None:

76

"""

77

Add JVM command line option (must be called before init).

78

79

Parameters:

80

- option: JVM option (e.g., '-Xmx4G', '-XX:+UseG1GC')

81

"""

82

83

def set_java_home(java_home: str) -> None:

84

"""

85

Set JAVA_HOME path for JVM discovery.

86

87

Parameters:

88

- java_home: Path to Java installation directory

89

"""

90

```

91

92

**Usage Examples:**

93

94

```python

95

# Set memory limit before initialization

96

pt.java.set_memory_limit('8G')

97

pt.java.init()

98

99

# Add custom JVM options

100

pt.java.add_option('-XX:+UseG1GC')

101

pt.java.add_option('-Xmx4G')

102

pt.java.init()

103

104

# Set custom Java installation

105

pt.java.set_java_home('/usr/lib/jvm/java-11-openjdk')

106

```

107

108

### Classpath Management

109

110

Functions for managing Java classpath and adding external JAR files or Maven packages.

111

112

```python { .api }

113

def extend_classpath(paths: List[str]) -> None:

114

"""

115

Extend Java classpath with additional JAR files or directories.

116

117

Parameters:

118

- paths: List of paths to JAR files or directories

119

"""

120

121

def add_jar(jar_path: str) -> None:

122

"""

123

Add single JAR file to classpath.

124

125

Parameters:

126

- jar_path: Path to JAR file

127

"""

128

129

def add_package(package_spec: str) -> None:

130

"""

131

Add Maven package to classpath.

132

133

Parameters:

134

- package_spec: Maven coordinates (e.g., 'org.apache.lucene:lucene-core:8.11.1')

135

"""

136

```

137

138

**Usage Examples:**

139

140

```python

141

# Add external JAR files

142

pt.java.add_jar('/path/to/custom.jar')

143

pt.java.extend_classpath(['/path/to/lib1.jar', '/path/to/lib2.jar'])

144

145

# Add Maven packages

146

pt.java.add_package('org.apache.commons:commons-lang3:3.12.0')

147

pt.java.add_package('com.fasterxml.jackson.core:jackson-core:2.13.0')

148

149

pt.java.init() # Initialize after adding dependencies

150

```

151

152

### Java Class Access

153

154

Functions for accessing and interacting with Java classes and objects from Python.

155

156

```python { .api }

157

def autoclass(class_name: str) -> type:

158

"""

159

Automatically load Java class for use in Python.

160

161

Parameters:

162

- class_name: Fully qualified Java class name

163

164

Returns:

165

- Python wrapper for Java class

166

"""

167

168

def cast(java_object: Any, target_class: str) -> Any:

169

"""

170

Cast Java object to specific type.

171

172

Parameters:

173

- java_object: Java object to cast

174

- target_class: Target class name for casting

175

176

Returns:

177

- Cast Java object

178

"""

179

180

J: Any # Direct access to Java classes (J.java.lang.String, etc.)

181

182

class JavaClasses:

183

"""Registry for commonly used Java classes."""

184

```

185

186

**Usage Examples:**

187

188

```python

189

# Load Java classes

190

ArrayList = pt.java.autoclass('java.util.ArrayList')

191

HashMap = pt.java.autoclass('java.util.HashMap')

192

193

# Create Java objects

194

java_list = ArrayList()

195

java_map = HashMap()

196

197

# Direct class access

198

string_class = pt.java.J.java.lang.String

199

integer_class = pt.java.J.java.lang.Integer

200

201

# Type casting

202

casted_object = pt.java.cast(some_object, 'org.terrier.structures.Index')

203

```

204

205

### I/O and Logging Configuration

206

207

Functions for managing Java I/O redirection and logging levels.

208

209

```python { .api }

210

def redirect_stdouterr() -> None:

211

"""

212

Redirect Java stdout/stderr to Python stdout/stderr.

213

"""

214

215

def set_redirect_io(redirect: bool) -> None:

216

"""

217

Configure I/O redirection (must be called before init).

218

219

Parameters:

220

- redirect: Whether to redirect Java I/O to Python

221

"""

222

223

def set_log_level(level: str) -> None:

224

"""

225

Set Java logging level.

226

227

Parameters:

228

- level: Log level ('ERROR', 'WARN', 'INFO', 'DEBUG')

229

"""

230

```

231

232

**Usage Examples:**

233

234

```python

235

# Configure I/O redirection

236

pt.java.set_redirect_io(True)

237

pt.java.init()

238

239

# Set logging level

240

pt.java.set_log_level('WARN') # Reduce log verbosity

241

242

# Manually redirect output

243

pt.java.redirect_stdouterr()

244

```

245

246

### Utility Functions

247

248

Helper functions for Java integration and data conversion.

249

250

```python { .api }

251

def bytebuffer_to_array(bytebuffer: Any) -> bytes:

252

"""

253

Convert Java ByteBuffer to Python bytes array.

254

255

Parameters:

256

- bytebuffer: Java ByteBuffer object

257

258

Returns:

259

- Python bytes object

260

"""

261

262

def required() -> bool:

263

"""

264

Check if Java is required for current operations.

265

266

Returns:

267

- True if Java is required, False otherwise

268

"""

269

270

def required_raise() -> None:

271

"""

272

Raise exception if Java is required but not available.

273

274

Raises:

275

- RuntimeError: If Java is required but not started

276

"""

277

```

278

279

### Parallel Initialization

280

281

Functions for parallel Java VM initialization and configuration.

282

283

```python { .api }

284

def parallel_init(*args, **kwargs) -> None:

285

"""

286

Initialize Java VM for parallel processing contexts.

287

288

Parameters:

289

- *args, **kwargs: Initialization parameters

290

"""

291

292

def parallel_init_args() -> Tuple[Any, ...]:

293

"""

294

Get arguments for parallel Java initialization.

295

296

Returns:

297

- Tuple of initialization arguments

298

"""

299

```

300

301

### Pre-initialization Hooks

302

303

Functions for registering callbacks that run before Java VM initialization.

304

305

```python { .api }

306

def before_init(callback: Callable[[], None]) -> None:

307

"""

308

Register callback to run before Java VM initialization.

309

310

Parameters:

311

- callback: Function to call before init

312

"""

313

```

314

315

**Usage Example:**

316

317

```python

318

# Register pre-initialization callback

319

def setup_custom_properties():

320

pt.terrier.set_property('custom.property', 'value')

321

322

pt.java.before_init(setup_custom_properties)

323

pt.java.init()

324

```

325

326

## Advanced Java Integration Patterns

327

328

### Custom Java Class Integration

329

330

```python

331

# Load custom Java classes

332

CustomRetriever = pt.java.autoclass('com.example.CustomRetriever')

333

custom_retriever = CustomRetriever()

334

335

# Use in PyTerrier pipeline

336

class CustomTransformer(pt.Transformer):

337

def __init__(self):

338

self.java_retriever = CustomRetriever()

339

340

def transform(self, topics):

341

# Use Java object in transformation

342

return self.java_retriever.retrieve(topics)

343

```

344

345

### Maven Dependency Management

346

347

```python

348

# Add multiple dependencies

349

dependencies = [

350

'org.apache.lucene:lucene-core:8.11.1',

351

'org.apache.lucene:lucene-analyzers-common:8.11.1',

352

'com.fasterxml.jackson.core:jackson-databind:2.13.0'

353

]

354

355

for dep in dependencies:

356

pt.java.add_package(dep)

357

358

pt.java.init()

359

```

360

361

### Memory-Optimized Configuration

362

363

```python

364

# Configure for large-scale processing

365

pt.java.set_memory_limit('16G')

366

pt.java.add_option('-XX:+UseG1GC')

367

pt.java.add_option('-XX:MaxGCPauseMillis=200')

368

pt.java.add_option('-XX:+DisableExplicitGC')

369

pt.java.set_redirect_io(False) # Reduce I/O overhead

370

pt.java.init()

371

```

372

373

## Types

374

375

```python { .api }

376

from typing import List, Any, Callable, Tuple, Optional

377

378

# Java integration types

379

JavaClass = type # Python wrapper for Java class

380

JavaObject = Any # Java object instance

381

ClassPath = List[str] # List of JAR files or directories

382

MavenCoordinate = str # Maven artifact coordinates

383

LogLevel = str # Java logging level

384

MemorySpec = str # Memory specification (e.g., '4G', '2048M')

385

InitCallback = Callable[[], None] # Pre-initialization callback

386

```