Simple DDL Parser to parse SQL & dialects like HQL, TSQL (MSSQL), Oracle, AWS Redshift, Snowflake, MySQL, PostgreSQL, etc ddl files to json/python dict with full information about columns: types, defaults, primary keys, etc.; sequences, alters, custom types & other entities from ddl.
—
Convert parsed DDL metadata back into formatted SQL DDL statements optimized for specific database platforms, with dialect-specific syntax and conventions. The output system transforms the structured metadata into properly formatted DDL that matches each database's requirements.
Dictionary mapping dialect names to output formatter classes for generating platform-specific DDL output.
supported_dialects: Dict[str, Any]Available output modes include:
"sql" - Generic SQL (default)"mysql" - MySQL dialect formatting"postgres" - PostgreSQL dialect formatting"mssql" - Microsoft SQL Server dialect formatting"oracle" - Oracle dialect formatting"hql" - Hive Query Language dialect formatting"bigquery" - Google BigQuery dialect formatting"redshift" - AWS Redshift dialect formatting"snowflake" - Snowflake dialect formatting"spark_sql" - Apache Spark SQL dialect formatting"ibm_db2" - IBM DB2 dialect formatting"athena" - AWS Athena dialect formatting"databricks" - Databricks dialect formatting"sqlite" - SQLite dialect formatting"vertica" - Vertica dialect formattingPlatform-specific output formatting classes that generate properly formatted DDL statements.
class Redshift:
"""AWS Redshift output formatter with Redshift-specific syntax."""
class SparkSQL:
"""Apache Spark SQL output formatter with Spark syntax."""
class MySQL:
"""MySQL output formatter with MySQL-specific syntax."""
class BigQuery:
"""Google BigQuery output formatter with BigQuery syntax."""
class MSSQL:
"""Microsoft SQL Server output formatter with T-SQL syntax."""
class Databricks:
"""Databricks output formatter with Databricks-specific syntax."""
class Sqlite:
"""SQLite output formatter with SQLite syntax limitations."""
class Vertica:
"""Vertica output formatter with Vertica-specific syntax."""
class IbmDB2:
"""IBM DB2 output formatter with DB2 syntax."""
class PostgreSQL:
"""PostgreSQL output formatter with PostgreSQL syntax."""
class Oracle:
"""Oracle output formatter with Oracle SQL syntax."""
class HQL:
"""Hive Query Language output formatter with HQL syntax."""
class Snowflake:
"""Snowflake output formatter with Snowflake syntax."""
class Athena:
"""AWS Athena output formatter with Athena/Presto syntax."""Clean up and format output data according to specified dialect requirements.
def dialects_clean_up(output_mode: str, table_data) -> Dict:
"""
Clean up output data based on specified dialect.
Parameters:
- output_mode (str): Target dialect for output formatting
- table_data: Parsed table metadata to format
Returns:
Dict: Cleaned and formatted table data for specified dialect
"""Core output functionality for data serialization and file operations.
def dump_data_to_file(table_name: str, dump_path: str, data: List[Dict]) -> None:
"""
Dump parsed data to JSON file.
Parameters:
- table_name (str): Name of table for filename
- dump_path (str): Directory path for output file
- data (List[Dict]): Parsed data to write
"""
class Output:
"""Output formatting class for data serialization."""from simple_ddl_parser import DDLParser
ddl = """
CREATE TABLE users (
id SERIAL PRIMARY KEY,
name VARCHAR(50) NOT NULL
);
"""
parser = DDLParser(ddl)
# Default SQL output
result = parser.run(output_mode="sql")
# PostgreSQL-specific output
pg_result = parser.run(output_mode="postgres")
# MySQL-specific output
mysql_result = parser.run(output_mode="mysql")from simple_ddl_parser import DDLParser
parser = DDLParser(complex_ddl_content)
# Save formatted output to files
result = parser.run(
dump=True,
dump_path="./output_schemas",
output_mode="postgres"
)
# Creates JSON files in ./output_schemas/ directoryfrom simple_ddl_parser import DDLParser
parser = DDLParser(ddl_content)
# Get JSON string instead of Python dict
json_output = parser.run(
output_mode="bigquery",
json_dump=True
)
print(json_output) # JSON formatted stringfrom simple_ddl_parser import DDLParser
parser = DDLParser(schema_with_tables_and_sequences)
# Group results by database object type
grouped_result = parser.run(
group_by_type=True,
output_mode="oracle"
)
# Returns: {"tables": [...], "sequences": [...], "indexes": [...]}from simple_ddl_parser.output.dialects import dialects_clean_up
# Apply dialect-specific formatting to parsed data
cleaned_data = dialects_clean_up("snowflake", raw_table_data)# MySQL-specific formatting includes:
{
"table_name": "users",
"columns": [
{
"name": "id",
"type": "INT",
"auto_increment": True,
"primary_key": True
}
],
"engine": "InnoDB",
"charset": "utf8mb4"
}# PostgreSQL-specific formatting includes:
{
"table_name": "users",
"columns": [
{
"name": "id",
"type": "SERIAL",
"primary_key": True
},
{
"name": "tags",
"type": "VARCHAR[]",
"size": 50
}
]
}# BigQuery-specific formatting includes:
{
"table_name": "events",
"dataset": "analytics",
"project": "my-project",
"columns": [
{
"name": "user_data",
"type": "STRUCT",
"fields": [
{"name": "name", "type": "STRING"},
{"name": "age", "type": "INT64"}
]
}
],
"clustering": ["user_id"],
"partitioning": {"field": "date", "type": "DAY"}
}Each output dialect handles:
Install with Tessl CLI
npx tessl i tessl/pypi-simple-ddl-parser