diff --git a/.isort.cfg b/.isort.cfg index 7f11e51..25ea4f4 100644 --- a/.isort.cfg +++ b/.isort.cfg @@ -1,2 +1,2 @@ [settings] -known_third_party = black,click,jinja2,pytest,setuptools,sql_to_code +known_third_party = black,click,jinja2,pyparsing,pytest,setuptools,sql_to_code diff --git a/sql_to_code/parsers/alter_table/parser.py b/sql_to_code/parsers/alter_table/parser.py index 5c37e2a..e63d3fa 100644 --- a/sql_to_code/parsers/alter_table/parser.py +++ b/sql_to_code/parsers/alter_table/parser.py @@ -1,32 +1,33 @@ -import re +from pyparsing import CaselessKeyword, QuotedString from .models import ForeignKey, Reference -source_table_name_regex = re.compile('ALTER TABLE "(?P\w+)"') -foreign_key_name_regex = re.compile('ADD FOREIGN KEY \("(?P\w+)"\)') -result_table_name_regex = re.compile('REFERENCES "(?P\w+)"') -result_table_field_name_regex = re.compile( - 'REFERENCES "\w+" \("(?P\w+)"\);' +table_name_schema = CaselessKeyword("alter table") + QuotedString('"')("table_name") +foreign_key_schema = CaselessKeyword("add foreign key") + QuotedString( + '("', endQuoteChar='")' +)("foreign_key") +reference_table = CaselessKeyword("references") + QuotedString('"')("reference_table") +reference_table_column_name = QuotedString('("', endQuoteChar='")')( + "reference_table_column_name" +) + +add_foreign_key_schema = ( + table_name_schema + + foreign_key_schema + + reference_table + + reference_table_column_name ) def parse(sql_text: str): - source_table_name = source_table_name_regex.search(sql_text).groupdict()[ - "table_name" - ] - foreign_key_name = foreign_key_name_regex.search(sql_text).groupdict()[ - "foreign_key" - ] - result_table_name = result_table_name_regex.search(sql_text).groupdict()[ - "result_table_name" - ] - result_table_field_name = result_table_field_name_regex.search( - sql_text - ).groupdict()["result_table_field_name"] + result = add_foreign_key_schema.parseString(sql_text) return ForeignKey( - refer_from=Reference(table_name=source_table_name, field_name=foreign_key_name), + refer_from=Reference( + table_name=result.table_name, field_name=result.foreign_key + ), refer_to=Reference( - table_name=result_table_name, field_name=result_table_field_name + table_name=result.reference_table, + field_name=result.reference_table_column_name, ), ) diff --git a/sql_to_code/parsers/create_enum/parser.py b/sql_to_code/parsers/create_enum/parser.py index 8d31f3d..3365f08 100644 --- a/sql_to_code/parsers/create_enum/parser.py +++ b/sql_to_code/parsers/create_enum/parser.py @@ -1,13 +1,18 @@ -import re +from pyparsing import CaselessKeyword, QuotedString, delimitedList from .models import Enumeration -NAME_REGEX = re.compile('CREATE TYPE "(?P\w+)"') -VALUE_REGEX = re.compile("'(\w+)'") +enum_schema = ( + CaselessKeyword("create type") + + QuotedString('"')("enum_name") + + CaselessKeyword("as enum") + + CaselessKeyword("(") + + delimitedList(QuotedString("'"))("enum_values") + + CaselessKeyword(");") +) def parse(sql_text: str): - name = NAME_REGEX.search(sql_text).groupdict()["name"] - values = VALUE_REGEX.findall(sql_text) + result = enum_schema.parseString(sql_text) - return Enumeration(name, values) + return Enumeration(result.enum_name, list(result.enum_values)) diff --git a/sql_to_code/parsers/create_table/models.py b/sql_to_code/parsers/create_table/models.py index ce3db61..643bc59 100644 --- a/sql_to_code/parsers/create_table/models.py +++ b/sql_to_code/parsers/create_table/models.py @@ -1,5 +1,5 @@ from dataclasses import dataclass, field -from typing import List, Union +from typing import List, Optional, Union from ..alter_table.models import Reference @@ -10,11 +10,19 @@ class Attribute: name: str type: str - is_default: bool default: default_type - primary_key: bool - nullable: bool - foreign_key: Reference = field(default=None) + is_unique: bool + is_nullable: bool + is_primary_key: bool + foreign_key: Optional[Reference] = field(default=None) + + @property + def has_default(self): + return self.default is not None + + @property + def has_foreign_key(self): + return self.foreign_key is not None @dataclass diff --git a/sql_to_code/parsers/create_table/parser.py b/sql_to_code/parsers/create_table/parser.py index e09d5f5..9e57216 100644 --- a/sql_to_code/parsers/create_table/parser.py +++ b/sql_to_code/parsers/create_table/parser.py @@ -1,46 +1,77 @@ -import re -from typing import List +from pyparsing import ( + CaselessKeyword, + Group, + Literal, + MatchFirst, + OneOrMore, + Optional, + QuotedString, + Suppress, + Word, + alphanums, + alphas, +) +from pyparsing import pyparsing_common as ppc +from pyparsing import quotedString -from .models import Attribute, Table, default_type +from .models import Attribute, Table -regex_name_schema: str = r'CREATE TABLE "([\S\d]+)"\s*\(\s*(.+)\s*\);' -name_and_type_regex = r"(^[\S\d]+)\s([\S]+)\s*" -default_regex = r"DEFAULT\s+(.?)+\s*" +# table schema +create_table = CaselessKeyword("create table") +table_name = MatchFirst(QuotedString('"'))("table_name") +# field schema +column_name = QuotedString('"')("column_name") -def parse(sql_text: str) -> Table: - table_name, schema = re.findall(regex_name_schema, sql_text)[0] - all_attributes: List[str] = schema.split(",") - schema: List[str] = map( - lambda attribute: attribute.strip().replace('"', ""), all_attributes - ) - attributes = parse_attributes(schema) +# column type: +# "(" ")" - because of varchar(40) +# "_" - because of enums like process_type +column_type = Word(alphas, alphanums + "(" + ")" + "_")("column_type") +is_primary_key = CaselessKeyword("primary key") +is_unique = CaselessKeyword("unique")("is_unique") - return Table(table_name, attributes) +date_time_now = Word("now()") +default_value = quotedString | ppc.real | ppc.signed_integer | date_time_now +has_default = CaselessKeyword("default") + default_value("default_value") +is_not_null = CaselessKeyword("not null")("is_not_null") -def parse_attributes(schema) -> List[Attribute]: - attributes = list() - for attribute in schema: - name, a_type = re.findall(pattern=name_and_type_regex, string=attribute)[0] +table_columns = OneOrMore( + Group( + column_name + + column_type + + ( + Optional(is_not_null) + & Optional(has_default) + & Optional(is_unique) + & Optional(is_primary_key) + ) + + Optional(Suppress(",")) + ) +)("table_columns") - is_pk = "PRIMARY KEY" in attribute.upper() - is_default = not is_pk and "DEFAULT" in attribute.upper() +create_table_schema = ( + create_table + table_name + Literal("(") + table_columns + Literal(");") +) - attributes.append( - Attribute( - name=name, - type=a_type, - primary_key=is_pk, - nullable="NOT NULL" not in attribute.upper() and not is_pk, - is_default=is_default, - default=parse_default(attribute) if is_default else None, - ) - ) - return attributes +def parse(sql_text: str) -> Table: + result = create_table_schema.parseString(sql_text) + table = Table( + result.table_name, + [ + Attribute( + name=column.column_name, + type=column.column_type, + default=column.default_value if column.default_value else None, + is_unique=bool(column.is_unique), + is_nullable=not bool(column.is_not_null), + is_primary_key=bool(column.is_primary_key), + ) + for column in result.table_columns + ], + ) -def parse_default(sql_command: str) -> default_type: - return re.findall(default_regex, sql_command)[0] + return table diff --git a/sql_to_code/utils.py b/sql_to_code/utils.py index ac77c13..8fba549 100644 --- a/sql_to_code/utils.py +++ b/sql_to_code/utils.py @@ -11,7 +11,6 @@ def get_file_content(filename: str): def parse_commands(content: str) -> List[str]: - raw_commands = content.split("\n\n") - commands = [command.replace("\n", "") for command in raw_commands] + commands = content.split("\n\n") return commands diff --git a/tests/test_sql/test_schema_alter.sql b/tests/test_sql/test_schema_alter.sql index 1b83365..17d85df 100644 --- a/tests/test_sql/test_schema_alter.sql +++ b/tests/test_sql/test_schema_alter.sql @@ -1,3 +1,3 @@ ALTER TABLE "issue" - ADD FOREIGN KEY ("process_id") + ADD FOREIGN KEY ("process_id_test") REFERENCES "process" ("process_id"); diff --git a/tests/test_sql/test_schema_table.sql b/tests/test_sql/test_schema_table.sql index a20f60c..49f043f 100644 --- a/tests/test_sql/test_schema_table.sql +++ b/tests/test_sql/test_schema_table.sql @@ -1,7 +1,7 @@ CREATE TABLE "process" ( "process_id" int PRIMARY KEY, - "booking_id" int, - "ticket_id" int, - "created_at" timestamp, - "updated_at" timestamp + "booking_id" int DEFAULT 10, + "state" process_state DEFAULT "verification" NOT NULL, + "created_at" timestamp DEFAULT (now()), + "updated_at" timestamp, ); diff --git a/tests/unit/test_context.py b/tests/unit/test_context.py index 68e0405..9eb6788 100644 --- a/tests/unit/test_context.py +++ b/tests/unit/test_context.py @@ -11,50 +11,56 @@ Attribute( name="process_id", type="int", - is_default=False, default=None, - primary_key=True, - nullable=False, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="booking_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="ticket_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="state", type="process_state", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="created_at", type="timestamp", - is_default=True, - default="", - primary_key=False, - nullable=True, + default="(now())", + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="updated_at", type="timestamp", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), ], ), @@ -64,18 +70,19 @@ Attribute( name="issue_id", type="int", - is_default=False, default=None, - primary_key=True, - nullable=False, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="process_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, foreign_key=Reference( table_name="process", field_name="process_id" ), @@ -83,18 +90,20 @@ Attribute( name="type", type="issue_type", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="created_at", type="timestamp", - is_default=True, - default="", - primary_key=False, - nullable=True, + default="(now())", + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), ], ), @@ -104,35 +113,37 @@ Attribute( name="email", type="varchar", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="type", type="user_action_type", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="issue_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, foreign_key=Reference(table_name="issue", field_name="issue_id"), ), Attribute( name="process_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, foreign_key=Reference( table_name="process", field_name="process_id" ), @@ -140,10 +151,10 @@ Attribute( name="passenger_segment_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, foreign_key=Reference( table_name="passenger_segment", field_name="passenger_segment_id", @@ -152,10 +163,11 @@ Attribute( name="created_at", type="timestamp", - is_default=True, - default="", - primary_key=False, - nullable=True, + default="(now())", + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), ], ), @@ -165,70 +177,76 @@ Attribute( name="passenger_segment_id", type="int", - is_default=False, default=None, - primary_key=True, - nullable=False, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="process_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, foreign_key=Reference( - table_name="process", field_name="process_id", + table_name="process", field_name="process_id" ), ), Attribute( name="passenger_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="segment_id", type="varchar", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="state", type="passenger_segment_state", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="issue_id", type="int", - is_default=False, default=None, - primary_key=False, - nullable=True, - foreign_key=Reference(table_name="issue", field_name="issue_id",), + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=Reference(table_name="issue", field_name="issue_id"), ), Attribute( name="updated_at", type="timestamp", - is_default=False, default=None, - primary_key=False, - nullable=True, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), Attribute( name="created_at", type="timestamp", - is_default=True, - default="", - primary_key=False, - nullable=True, + default="(now())", + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, ), ], ), diff --git a/tests/unit/test_parse_file.py b/tests/unit/test_parse_file.py index b8d79d6..6a885fc 100644 --- a/tests/unit/test_parse_file.py +++ b/tests/unit/test_parse_file.py @@ -3,20 +3,20 @@ def test_parse_sql_commands() -> None: result_commands = [ - "CREATE TYPE \"process_state\" AS ENUM ( 'verification', 'assessment', 'processing');", - "CREATE TYPE \"issue_type\" AS ENUM ( 'could_not_verify', 'segment_not_available', 'priority_boarding_sold_out', 'schedule_change', 'invalid_pax_name', 'price_change', 'vpn_issues');", - "CREATE TYPE \"user_action_type\" AS ENUM ( 'send_email');", - "CREATE TYPE \"passenger_segment_state\" AS ENUM ( 'pending', 'issue', 'processed', 'not_processed');", - 'CREATE TABLE "process" ( "process_id" int PRIMARY KEY, "booking_id" int, "ticket_id" int, "state" process_state, "created_at" timestamp DEFAULT (now()), "updated_at" timestamp);', - 'CREATE TABLE "issue" ( "issue_id" int PRIMARY KEY, "process_id" int, "type" issue_type, "created_at" timestamp DEFAULT (now()));', - 'CREATE TABLE "user_action" ( "email" varchar, "type" user_action_type, "issue_id" int, "process_id" int, "passenger_segment_id" int, "created_at" timestamp DEFAULT (now()));', - 'CREATE TABLE "passenger_segment" ( "passenger_segment_id" int PRIMARY KEY, "process_id" int, "passenger_id" int, "segment_id" varchar, "state" passenger_segment_state, "issue_id" int, "updated_at" timestamp, "created_at" timestamp DEFAULT (now()));', + "CREATE TYPE \"process_state\" AS ENUM (\n 'verification',\n 'assessment',\n 'processing'\n);", + "CREATE TYPE \"issue_type\" AS ENUM (\n 'could_not_verify',\n 'segment_not_available',\n 'priority_boarding_sold_out',\n 'schedule_change',\n 'invalid_pax_name',\n 'price_change',\n 'vpn_issues'\n);", + "CREATE TYPE \"user_action_type\" AS ENUM (\n 'send_email'\n);", + "CREATE TYPE \"passenger_segment_state\" AS ENUM (\n 'pending',\n 'issue',\n 'processed',\n 'not_processed'\n);", + 'CREATE TABLE "process" (\n "process_id" int PRIMARY KEY,\n "booking_id" int,\n "ticket_id" int,\n "state" process_state,\n "created_at" timestamp DEFAULT (now()),\n "updated_at" timestamp\n);', + 'CREATE TABLE "issue" (\n "issue_id" int PRIMARY KEY,\n "process_id" int,\n "type" issue_type,\n "created_at" timestamp DEFAULT (now())\n);', + 'CREATE TABLE "user_action" (\n "email" varchar,\n "type" user_action_type,\n "issue_id" int,\n "process_id" int,\n "passenger_segment_id" int,\n "created_at" timestamp DEFAULT (now())\n);', + 'CREATE TABLE "passenger_segment" (\n "passenger_segment_id" int PRIMARY KEY,\n "process_id" int,\n "passenger_id" int,\n "segment_id" varchar,\n "state" passenger_segment_state,\n "issue_id" int,\n "updated_at" timestamp,\n "created_at" timestamp DEFAULT (now())\n);', 'ALTER TABLE "issue" ADD FOREIGN KEY ("process_id") REFERENCES "process" ("process_id");', 'ALTER TABLE "user_action" ADD FOREIGN KEY ("issue_id") REFERENCES "issue" ("issue_id");', 'ALTER TABLE "user_action" ADD FOREIGN KEY ("process_id") REFERENCES "process" ("process_id");', 'ALTER TABLE "user_action" ADD FOREIGN KEY ("passenger_segment_id") REFERENCES "passenger_segment" ("passenger_segment_id");', 'ALTER TABLE "passenger_segment" ADD FOREIGN KEY ("process_id") REFERENCES "process" ("process_id");', - 'ALTER TABLE "passenger_segment" ADD FOREIGN KEY ("issue_id") REFERENCES "issue" ("issue_id");', + 'ALTER TABLE "passenger_segment" ADD FOREIGN KEY ("issue_id") REFERENCES "issue" ("issue_id");\n', ] sql_text = get_file_content("tests/test_sql/test_schema.sql") diff --git a/tests/unit/test_parsers.py b/tests/unit/test_parsers.py index 4eae615..b9810a6 100644 --- a/tests/unit/test_parsers.py +++ b/tests/unit/test_parsers.py @@ -6,9 +6,9 @@ ForeignKey, ParserOutput, Reference, - create_table, + Table, ) -from sql_to_code.utils import get_file_content, parse_commands +from sql_to_code.utils import get_file_content sql_test_files_root = "tests/test_sql" @@ -28,10 +28,64 @@ f"{sql_test_files_root}/test_schema_alter.sql", parsers.alter_table.parser, ForeignKey( - refer_from=Reference(table_name="issue", field_name="process_id"), + refer_from=Reference(table_name="issue", field_name="process_id_test"), refer_to=Reference(table_name="process", field_name="process_id"), ), ), + ( + f"{sql_test_files_root}/test_schema_table.sql", + parsers.create_table.parser, + Table( + name="process", + schema=[ + Attribute( + name="process_id", + type="int", + default=None, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, + ), + Attribute( + name="booking_id", + type="int", + default=10, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, + ), + Attribute( + name="state", + type="process_state", + default='"verification"', + is_unique=False, + is_nullable=False, + is_primary_key=False, + foreign_key=None, + ), + Attribute( + name="created_at", + type="timestamp", + default="(now())", + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, + ), + Attribute( + name="updated_at", + type="timestamp", + default=None, + is_unique=False, + is_nullable=True, + is_primary_key=False, + foreign_key=None, + ), + ], + ), + ), ], ) def test_parsers(fixture_sql_filename, parser, expected_result: ParserOutput) -> None: @@ -39,22 +93,3 @@ def test_parsers(fixture_sql_filename, parser, expected_result: ParserOutput) -> result = parser.parse(sql_text) assert result == expected_result - - -def test_table_parser() -> None: - attributes = [ - Attribute("process_id", "int", False, None, True, False), - Attribute("booking_id", "int", False, None, False, True), - Attribute("ticket_id", "int", False, None, False, True), - Attribute("created_at", "timestamp", False, None, False, True), - Attribute("updated_at", "timestamp", False, None, False, True), - ] - - sql_text = get_file_content("tests/test_sql/test_schema_table.sql") - - commands = parse_commands(sql_text) - - table = create_table.parser.parse(commands[0]) - - assert table.name == "process" - assert table.schema == attributes