or run

npx @tessl/cli init
Log in

Version

Tile

Overview

Evals

Files

Files

docs

cli-interface.mdconfiguration.mddata-models.mdindex.mdlineage-runner.mdmetadata-providers.mdvisualization-export.md

data-models.mddocs/

0

# Data Models

1

2

Core data classes representing SQL entities like tables, columns, schemas, and subqueries. These models provide the foundation for lineage analysis and include support for complex SQL constructs like CTEs, subqueries, and cross-schema references.

3

4

## Capabilities

5

6

### Schema

7

8

Represents database schemas with support for default schema handling and cross-schema references.

9

10

```python { .api }

11

class Schema:

12

unknown: str = "<default>" # Class attribute for unknown schema

13

14

def __init__(self, name: Optional[str] = None):

15

"""

16

Create a schema object.

17

18

Parameters:

19

- name: schema name (optional, uses default if not provided)

20

"""

21

22

# raw_name: str # Instance attribute set in __init__

23

24

def __str__(self) -> str:

25

"""String representation of the schema"""

26

27

def __bool__(self) -> bool:

28

"""Check if schema is known (not the default unknown schema)"""

29

```

30

31

### Table

32

33

Represents database tables with schema qualification, alias support, and flexible name parsing.

34

35

```python { .api }

36

class Table:

37

def __init__(self, name: str, schema: Schema = Schema(), **kwargs):

38

"""

39

Create a table object.

40

41

Parameters:

42

- name: table name, optionally qualified (schema.table format)

43

- schema: Schema object (ignored if name is already qualified)

44

- alias: table alias (passed via kwargs)

45

"""

46

47

# schema: Schema # Instance attribute set in __init__

48

# raw_name: str # Instance attribute set in __init__

49

# alias: str # Instance attribute set in __init__

50

51

def __str__(self) -> str:

52

"""String representation as schema.table"""

53

54

@staticmethod

55

def of(table: Any) -> "Table":

56

"""Abstract factory method for creating Table from parser objects"""

57

```

58

59

### Column

60

61

Represents table columns with parent table relationships, source column tracking, and alias resolution.

62

63

```python { .api }

64

class Column:

65

def __init__(self, name: str, **kwargs):

66

"""

67

Create a column object.

68

69

Parameters:

70

- name: column name

71

- Additional attributes passed via kwargs

72

"""

73

74

# raw_name: str # Instance attribute set in __init__

75

# source_columns: List[Tuple[str, Optional[str]]] # Instance attribute set in __init__

76

# from_alias: bool # Instance attribute set in __init__

77

78

@property

79

def parent(self) -> Optional[Union[Path, Table, SubQuery]]:

80

"""Get the parent table, subquery, or path"""

81

82

@property

83

def parent_candidates(self) -> List[Union[Path, Table, SubQuery]]:

84

"""Get list of possible parent tables/subqueries"""

85

86

def to_source_columns(self, alias_mapping: Dict[str, Union[Path, Table, SubQuery]]) -> Set[Column]:

87

"""

88

Resolve source columns using alias mapping.

89

90

Parameters:

91

- alias_mapping: mapping of aliases to table/subquery objects

92

93

Returns:

94

Set of resolved source Column objects

95

"""

96

97

@staticmethod

98

def of(column: Any, **kwargs) -> "Column":

99

"""Abstract factory method for creating Column from parser objects"""

100

```

101

102

### SubQuery

103

104

Represents SQL subqueries with alias support and raw query preservation.

105

106

```python { .api }

107

class SubQuery:

108

def __init__(self, subquery: Any, subquery_raw: str, alias: Optional[str]):

109

"""

110

Create a subquery object.

111

112

Parameters:

113

- subquery: parsed subquery object

114

- subquery_raw: raw SQL string of the subquery

115

- alias: subquery alias (optional)

116

"""

117

118

# query: Any # Instance attribute set in __init__

119

# query_raw: str # Instance attribute set in __init__

120

# alias: str # Instance attribute set in __init__

121

122

@staticmethod

123

def of(subquery: Any, alias: Optional[str]) -> "SubQuery":

124

"""Abstract factory method for creating SubQuery from parser objects"""

125

```

126

127

### Path

128

129

Represents file paths and URIs for external data sources.

130

131

```python { .api }

132

class Path:

133

def __init__(self, uri: str):

134

"""

135

Create a path object.

136

137

Parameters:

138

- uri: file path or URI

139

"""

140

141

# uri: str # Instance attribute set in __init__

142

```

143

144

## Usage Examples

145

146

### Basic Table and Column Creation

147

148

```python

149

from sqllineage.core.models import Table, Column, Schema

150

151

# Create schema

152

analytics_schema = Schema("analytics")

153

154

# Create table with schema

155

customer_table = Table("customers", schema=analytics_schema)

156

print(customer_table) # analytics.customers

157

158

# Create table with qualified name

159

orders_table = Table("sales.orders")

160

print(orders_table.schema) # sales

161

print(orders_table.raw_name) # orders

162

163

# Create columns

164

customer_id = Column("customer_id")

165

order_total = Column("total_amount")

166

```

167

168

### Working with Aliases

169

170

```python

171

# Table with alias

172

customer_table = Table("customers", alias="c")

173

print(customer_table.alias) # c

174

175

# Check column alias sources

176

column = Column("customer_name")

177

if column.from_alias:

178

print("Column comes from table alias")

179

```

180

181

### Schema Handling

182

183

```python

184

# Default schema

185

default_schema = Schema()

186

print(bool(default_schema)) # False (unknown schema)

187

188

# Named schema

189

named_schema = Schema("production")

190

print(bool(named_schema)) # True

191

print(named_schema.raw_name) # production

192

```

193

194

### Subquery Representation

195

196

```python

197

# Subqueries are typically created by the parser

198

# but can be constructed manually for testing

199

subquery_sql = "(SELECT customer_id, COUNT(*) FROM orders GROUP BY customer_id)"

200

# subquery = SubQuery(parsed_query, subquery_sql, "order_counts")

201

```

202

203

### Complex Table Relationships

204

205

```python

206

# Multi-level schema qualification

207

# Some databases support database.schema.table format

208

try:

209

table = Table("prod_db.analytics.customer_summary")

210

print(f"Schema: {table.schema}, Table: {table.raw_name}")

211

except SQLLineageException as e:

212

print(f"Invalid table format: {e}")

213

```

214

215

### Column Lineage Tracking

216

217

```python

218

# Columns can track their source relationships

219

source_col = Column("customer_id")

220

target_col = Column("cust_id")

221

222

# Parent table assignment (typically done by parser)

223

source_col.parent = Table("raw.customers")

224

target_col.parent = Table("analytics.customer_summary")

225

226

print(f"Source: {source_col.parent}.{source_col.raw_name}")

227

print(f"Target: {target_col.parent}.{target_col.raw_name}")

228

```

229

230

### Working with File Paths

231

232

```python

233

# For SQL that references files (e.g., Spark, BigQuery)

234

data_path = Path("s3://data-lake/raw/customers.parquet")

235

print(data_path.uri) # s3://data-lake/raw/customers.parquet

236

237

# Local file paths

238

local_path = Path("/data/exports/customer_data.csv")

239

print(local_path.uri) # /data/exports/customer_data.csv

240

```