or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

classification.mdclustering.mddata-handling.mddistance.mdevaluation.mdindex.mdpreprocessing.mdprojection.mdregression.mdwidgets.md

data-handling.mddocs/

0

# Data Handling and I/O

1

2

Orange3's data handling system provides comprehensive functionality for loading, manipulating, and transforming datasets. The core data structure is the Table class, which combines data storage with metadata through the Domain system.

3

4

## Capabilities

5

6

### Table Operations

7

8

The Table class is Orange3's primary data structure, representing datasets with features, target variables, and metadata.

9

10

```python { .api }

11

class Table:

12

"""

13

Data table with Orange-specific data structures.

14

15

Use factory methods for creating Table instances:

16

- Table.from_file() for loading from files

17

- Table.from_domain() for creating empty tables

18

- Table.from_table() for transforming existing tables

19

- Table.from_numpy() for creating from arrays

20

"""

21

@classmethod

22

def from_file(cls, filename, **kwargs):

23

"""Load table from file (recommended way to load data)."""

24

25

@classmethod

26

def from_domain(cls, domain, n_rows=0, weights=False):

27

"""Create empty table with given domain and number of rows."""

28

29

@classmethod

30

def from_table(cls, domain, source, row_indices=...):

31

"""Create table from selected columns/rows of existing table."""

32

33

@classmethod

34

def from_numpy(cls, domain, X, Y=None, metas=None, **kwargs):

35

"""Create table from numpy arrays."""

36

37

@classmethod

38

def from_url(cls, url, **kwargs):

39

"""Load table from URL."""

40

41

def save(self, filename):

42

"""Save table to file."""

43

44

def copy(self):

45

"""Create a copy of the table."""

46

47

def transform(self, domain):

48

"""Transform table to match new domain."""

49

50

def select_rows(self, row_indices):

51

"""Select specific rows by indices."""

52

53

def get_column_view(self, column):

54

"""Get column data as numpy array."""

55

56

@property

57

def X(self):

58

"""Feature data as numpy array."""

59

60

@property

61

def Y(self):

62

"""Target data as numpy array."""

63

64

@property

65

def metas(self):

66

"""Meta attribute data."""

67

68

@property

69

def domain(self):

70

"""Domain defining table structure."""

71

```

72

73

### Domain Management

74

75

Domain objects define the structure and metadata of datasets, including variable types and relationships.

76

77

```python { .api }

78

class Domain:

79

"""

80

Dataset structure definition.

81

82

Args:

83

attributes: List of feature variables

84

class_vars: List of target variables

85

metas: List of meta variables

86

"""

87

def __init__(self, attributes, class_vars=None, metas=None): ...

88

89

def select_columns(self, columns):

90

"""Create new domain with selected columns."""

91

92

@property

93

def variables(self):

94

"""All variables in the domain."""

95

96

@property

97

def attributes(self):

98

"""Feature variables."""

99

100

@property

101

def class_vars(self):

102

"""Target variables."""

103

104

@property

105

def metas(self):

106

"""Meta variables."""

107

```

108

109

### Variable Types

110

111

Orange3 supports different variable types for various data formats and analysis needs.

112

113

```python { .api }

114

class Variable:

115

"""Base class for all variable types."""

116

def __init__(self, name="", compute_value=None): ...

117

118

@property

119

def name(self):

120

"""Variable name."""

121

122

def copy(self, compute_value=None):

123

"""Create copy of variable."""

124

125

class ContinuousVariable(Variable):

126

"""Numeric variable for continuous values."""

127

def __init__(self, name="", number_of_decimals=None, compute_value=None, *, sparse=False): ...

128

129

class DiscreteVariable(Variable):

130

"""Categorical variable with finite set of values."""

131

def __init__(self, name="", values=(), ordered=False, compute_value=None, *, sparse=False): ...

132

133

@property

134

def values(self):

135

"""List of possible categorical values."""

136

137

class StringVariable(Variable):

138

"""Text-based variable."""

139

def __init__(self, name): ...

140

141

class TimeVariable(ContinuousVariable):

142

"""Time/datetime variable."""

143

def __init__(self, name, have_date=False, have_time=False): ...

144

```

145

146

### File I/O Operations

147

148

Support for various file formats and data sources.

149

150

```python { .api }

151

class FileFormat:

152

"""Base class for file format handlers."""

153

EXTENSIONS = ()

154

DESCRIPTION = ""

155

156

@classmethod

157

def read(cls, filename):

158

"""Read data from file."""

159

160

@classmethod

161

def write(cls, filename, data):

162

"""Write data to file."""

163

164

def get_sample_datasets_dir():

165

"""

166

Get path to Orange's sample datasets directory.

167

168

Returns:

169

str: Path to datasets directory

170

"""

171

```

172

173

### Data Filtering

174

175

Comprehensive filtering system for data selection and manipulation.

176

177

```python { .api }

178

class Values:

179

"""Filter data based on variable values."""

180

def __init__(self, conditions): ...

181

182

def __call__(self, data):

183

"""Apply filter to data."""

184

185

class Random:

186

"""Random sampling filter."""

187

def __init__(self, prob=0.5, stratified=False): ...

188

189

def __call__(self, data):

190

"""Apply random sampling."""

191

192

class IsDefined:

193

"""Filter rows with defined (non-missing) values."""

194

def __init__(self, columns=None, negate=False): ...

195

196

def __call__(self, data):

197

"""Filter defined values."""

198

199

class SameValue:

200

"""Filter rows where column has same value."""

201

def __init__(self, column, value): ...

202

203

def __call__(self, data):

204

"""Apply same value filter."""

205

```

206

207

### Data Conversion and Compatibility

208

209

Integration with popular data science libraries.

210

211

```python { .api }

212

def table_from_frame(df, *, force_nominal=False, **kwargs):

213

"""

214

Convert pandas DataFrame to Orange Table.

215

216

Args:

217

df: pandas DataFrame

218

force_nominal: Force string variables to be nominal

219

220

Returns:

221

Table: Converted Orange table

222

"""

223

224

def table_to_frame(table, include_metas=True):

225

"""

226

Convert Orange Table to pandas DataFrame.

227

228

Args:

229

table: Orange Table

230

include_metas: Include meta attributes

231

232

Returns:

233

DataFrame: Converted pandas DataFrame

234

"""

235

```

236

237

### Usage Examples

238

239

```python

240

# Load data from file

241

data = Table("iris.tab")

242

print(f"Dataset shape: {data.X.shape}")

243

print(f"Features: {[var.name for var in data.domain.attributes]}")

244

245

# Create custom domain

246

from Orange.data import ContinuousVariable, DiscreteVariable, Domain

247

age = ContinuousVariable("age")

248

gender = DiscreteVariable("gender", values=["M", "F"])

249

income = ContinuousVariable("income")

250

domain = Domain([age, income], gender)

251

252

# Create table from arrays

253

import numpy as np

254

X = np.random.rand(100, 2)

255

Y = np.random.choice([0, 1], 100)

256

custom_data = Table.from_numpy(domain, X, Y)

257

258

# Filter data

259

from Orange.data import Values, IsDefined

260

filtered_data = Values([

261

Values.GE(data.domain["sepal length"], 5.0)

262

])(data)

263

264

# Convert to/from pandas

265

import pandas as pd

266

df = table_to_frame(data)

267

back_to_table = table_from_frame(df)

268

```