or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

builtins.mdcategorical.mdcontrasts.mdhigh-level.mdindex.mdmatrix-building.mdsplines.mdtransforms.mdutilities.md

builtins.mddocs/

0

# Built-in Functions

1

2

Special functions available in formula namespaces for escaping arithmetic operations and handling variable names with special characters. These functions are automatically imported into the formula evaluation environment.

3

4

## Capabilities

5

6

### Identity Function

7

8

Escapes arithmetic operations from formula parsing, allowing complex expressions to be treated as single terms.

9

10

```python { .api }

11

def I(x):

12

"""

13

Identity function that returns its input unchanged.

14

15

The key purpose is to 'hide' arithmetic operations from Patsy's formula parser.

16

Since the parser ignores anything inside function call syntax, I() allows

17

complex expressions to be treated as single predictors.

18

19

Parameters:

20

- x: Any expression or value

21

22

Returns:

23

The input value unchanged

24

"""

25

```

26

27

#### Usage Examples

28

29

```python

30

import patsy

31

import pandas as pd

32

import numpy as np

33

34

data = pd.DataFrame({

35

'x1': [1, 2, 3, 4, 5],

36

'x2': [2, 4, 6, 8, 10],

37

'y': [3, 6, 9, 12, 15]

38

})

39

40

# Without I(): creates separate terms for x1 and x2

41

design1 = patsy.dmatrix("x1 + x2", data)

42

print(f"Without I(): {design1.shape[1]} columns") # 3 columns: intercept, x1, x2

43

44

# With I(): creates single term for the sum

45

design2 = patsy.dmatrix("I(x1 + x2)", data)

46

print(f"With I(): {design2.shape[1]} columns") # 2 columns: intercept, sum

47

48

# More complex expressions

49

design3 = patsy.dmatrix("I(x1**2) + I(x2**3)", data) # Polynomial terms

50

design4 = patsy.dmatrix("I(x1 * x2)", data) # Interaction as single term

51

design5 = patsy.dmatrix("I((x1 + x2) / 2)", data) # Arithmetic mean

52

53

# Complete model with I() functions

54

y, X = patsy.dmatrices("y ~ I(x1**2) + I(x1 * x2)", data)

55

print("Column names:", X.design_info.column_names)

56

```

57

58

### Variable Name Quoting

59

60

Allows reference to variable names that don't conform to Python identifier rules.

61

62

```python { .api }

63

def Q(name):

64

"""

65

Quote variable names, especially those that don't meet Python's variable name rules.

66

67

Takes a string containing a variable name and returns the value of that variable

68

from the evaluation environment. Useful for column names with special characters,

69

spaces, or reserved words.

70

71

Parameters:

72

- name (str): String containing the variable name to look up

73

74

Returns:

75

The value of the named variable from the evaluation environment

76

77

Raises:

78

NameError: If no variable with the given name is found

79

"""

80

```

81

82

#### Usage Examples

83

84

```python

85

import patsy

86

import pandas as pd

87

88

# Data with problematic column names

89

data = pd.DataFrame({

90

'weight.in.kg': [70, 80, 90, 75, 85],

91

'height in cm': [170, 180, 185, 175, 182],

92

'age-years': [25, 30, 35, 28, 32],

93

'class': [1, 2, 1, 2, 1], # 'class' is a Python reserved word

94

'y': [1, 2, 3, 4, 5]

95

})

96

97

# These would fail without Q():

98

# design = patsy.dmatrix("weight.in.kg", data) # Error: attribute access

99

# design = patsy.dmatrix("height in cm", data) # Error: 'in' is reserved word

100

# design = patsy.dmatrix("class", data) # Error: 'class' is reserved word

101

102

# Use Q() to handle problematic names:

103

design1 = patsy.dmatrix('Q("weight.in.kg")', data)

104

design2 = patsy.dmatrix('Q("height in cm")', data)

105

design3 = patsy.dmatrix('Q("age-years")', data)

106

design4 = patsy.dmatrix('Q("class")', data)

107

108

# Multiple problematic variables

109

design_multi = patsy.dmatrix('Q("weight.in.kg") + Q("height in cm")', data)

110

111

# Complete model with quoted variables

112

y, X = patsy.dmatrices('y ~ Q("weight.in.kg") + Q("class")', data)

113

print("Column names:", X.design_info.column_names)

114

115

# Q() can be used in complex expressions

116

design_complex = patsy.dmatrix('I(Q("weight.in.kg") / Q("height in cm"))', data) # BMI-like ratio

117

```

118

119

## Advanced Usage Patterns

120

121

### Combining I() and Q()

122

123

```python

124

import patsy

125

import pandas as pd

126

import numpy as np

127

128

# Data with both problematic names and need for complex expressions

129

data = pd.DataFrame({

130

'var.1': [1, 2, 3, 4, 5],

131

'var.2': [2, 4, 6, 8, 10],

132

'weight in kg': [70, 75, 80, 85, 90],

133

'height in m': [1.7, 1.8, 1.75, 1.85, 1.82],

134

'y': [20, 25, 22, 28, 26]

135

})

136

137

# Combine Q() and I() for complex expressions with problematic names

138

bmi_design = patsy.dmatrix('I(Q("weight in kg") / Q("height in m")**2)', data)

139

interaction_design = patsy.dmatrix('I(Q("var.1") * Q("var.2"))', data)

140

polynomial_design = patsy.dmatrix('I(Q("var.1")**2) + I(Q("var.1")**3)', data)

141

142

# Complete model

143

y, X = patsy.dmatrices(

144

'y ~ Q("var.1") + Q("var.2") + I(Q("weight in kg") / Q("height in m")**2)',

145

data

146

)

147

```

148

149

### Formula String Quoting Considerations

150

151

```python

152

# Different ways to handle quotes in formulas with Q()

153

154

# Option 1: Single quotes around formula, double quotes in Q()

155

formula1 = 'y ~ Q("weight.in.kg")'

156

157

# Option 2: Double quotes around formula, single quotes in Q()

158

formula2 = "y ~ Q('weight.in.kg')"

159

160

# Option 3: Double quotes with escaped inner quotes

161

formula3 = "y ~ Q(\"weight.in.kg\")"

162

163

# Option 4: Triple quotes for complex formulas

164

formula4 = '''y ~ Q("weight.in.kg") + Q("height in cm")'''

165

166

# All produce the same result

167

designs = [patsy.dmatrix(f, data) for f in [formula1, formula2, formula3, formula4]]

168

```

169

170

### Working with Pandas Column Names

171

172

```python

173

import patsy

174

import pandas as pd

175

176

# Real-world example with messy column names

177

survey_data = pd.DataFrame({

178

'Q1. How satisfied are you?': [5, 4, 3, 5, 4],

179

'Income ($)': [50000, 60000, 45000, 70000, 55000],

180

'2023_score': [85, 90, 75, 95, 80],

181

'group-id': ['A', 'B', 'A', 'C', 'B'],

182

'outcome': [1, 2, 1, 3, 2]

183

})

184

185

# Use Q() for all problematic column names

186

design = patsy.dmatrix('''

187

Q("Q1. How satisfied are you?") +

188

Q("Income ($)") +

189

Q("2023_score") +

190

C(Q("group-id"))

191

''', survey_data)

192

193

print("Successfully created design matrix with problematic column names")

194

print("Column names:", design.design_info.column_names)

195

```

196

197

### Dynamic Variable Selection with Q()

198

199

```python

200

import patsy

201

202

# Programmatically build formulas with Q()

203

data = pd.DataFrame({

204

'var-1': [1, 2, 3], 'var-2': [4, 5, 6], 'var-3': [7, 8, 9],

205

'outcome': [10, 11, 12]

206

})

207

208

# List of problematic variable names

209

predictors = ['var-1', 'var-2', 'var-3']

210

211

# Build formula dynamically

212

quoted_predictors = [f'Q("{var}")' for var in predictors]

213

formula = 'outcome ~ ' + ' + '.join(quoted_predictors)

214

print(f"Dynamic formula: {formula}")

215

216

y, X = patsy.dmatrices(formula, data)

217

```

218

219

## Integration with Other Patsy Functions

220

221

### I() with Transformations

222

223

```python

224

# Combine I() with stateful transforms

225

design = patsy.dmatrix("standardize(I(x1 + x2))", data) # Standardize the sum

226

227

# I() with splines

228

design = patsy.dmatrix("bs(I(x1 * x2), df=4)", data) # Spline of interaction

229

```

230

231

### Q() with Categorical Variables

232

233

```python

234

# Categorical variables with problematic names

235

data_cat = pd.DataFrame({

236

'treatment-group': ['control', 'drug_a', 'drug_b'] * 10,

237

'patient.id': range(30),

238

'response': np.random.normal(0, 1, 30)

239

})

240

241

# Use Q() with C() for categorical specification

242

design = patsy.dmatrix('C(Q("treatment-group"))', data_cat)

243

y, X = patsy.dmatrices('response ~ C(Q("treatment-group"))', data_cat)

244

```

245

246

## Error Handling

247

248

### Common Q() Errors

249

250

```python

251

import patsy

252

253

# Variable doesn't exist

254

try:

255

design = patsy.dmatrix('Q("nonexistent_var")', data)

256

except NameError as e:

257

print(f"Variable not found: {e}")

258

259

# Typo in variable name

260

try:

261

design = patsy.dmatrix('Q("weight.in.kgg")', data) # Extra 'g'

262

except NameError as e:

263

print(f"Typo in variable name: {e}")

264

```

265

266

### Debugging Formula Issues

267

268

```python

269

# Check what variables are available

270

print("Available columns:", data.columns.tolist())

271

272

# Test Q() function directly

273

try:

274

test_value = patsy.Q("weight.in.kg") # Won't work outside formula context

275

except Exception as e:

276

print("Q() needs proper evaluation environment")

277

278

# Use in formula context

279

design = patsy.dmatrix('Q("weight.in.kg")', data) # Works correctly

280

```