From b3d3eafd1c0904fc05a4c8497ac0ce0be7e0b54c Mon Sep 17 00:00:00 2001
From: Korawich Anuttra <korawich.anu@gmail.com>
Date: Sun, 16 Mar 2025 22:28:12 +0700
Subject: [PATCH 1/4] :page_facing_up: docs: update readme for usage of
 data-quality.

---
 README.md                | 24 ++++++++++++++++++++++++
 src/sqlplate/__init__.py |  1 +
 2 files changed, 25 insertions(+)

diff --git a/README.md b/README.md
index efc6ef3..b305313 100644
--- a/README.md
+++ b/README.md
@@ -40,6 +40,8 @@ pip install -U sqlplate
 
 ## :fork_and_knife: Usage
 
+### Generate SQL template
+
 Start passing option parameters before generate the Delta ETL SQL statement that
 will use on the Azure Databricks service.
 
@@ -108,6 +110,28 @@ WHEN NOT MATCHED THEN INSERT
 ;
 ```
 
+### Data Quality
+
+> [!IMPORTANT]
+> This feature does not support yet!!!
+
+```python
+from sqlplate import SQLity, Condition
+
+report: str = (
+    SQLity.format('databricks')
+    .template('quality')
+    .option('catalog', 'catalog-name')
+    .option('schema', 'schema-name')
+    .option('table', 'table-name')
+    .check('unique', Condition(cols=['pk_col'], rule="unique"))
+    .check('not-null', Condition(cols=['col01', 'col02'], rule="not-null"))
+    .check('row-count', Contition(rule="count"))
+    .validate(output='html')
+)
+print(report.strip().strip('\n'))
+```
+
 ## :chains: Support Systems
 
 | System             | Progress Status | System Integration Test | Remark                            |
diff --git a/src/sqlplate/__init__.py b/src/sqlplate/__init__.py
index 73cf028..e208322 100644
--- a/src/sqlplate/__init__.py
+++ b/src/sqlplate/__init__.py
@@ -1 +1,2 @@
 from .sqlplate import SQLPlate
+from .sqlity import SQLity

From 9e1111eff78d0a11635ccee258a7e5a6110c2026 Mon Sep 17 00:00:00 2001
From: Korawich Anuttra <korawich.anu@gmail.com>
Date: Sun, 16 Mar 2025 22:40:47 +0700
Subject: [PATCH 2/4] :gear: fixed: remove usesage pattern of data-quality.

---
 README.md                | 13 +++++++------
 src/sqlplate/__init__.py |  1 -
 src/sqlplate/rules.py    | 10 ++++++++++
 src/sqlplate/sqlity.py   | 10 ----------
 src/sqlplate/sqlplate.py | 23 ++++++++++++++++++++++-
 5 files changed, 39 insertions(+), 18 deletions(-)
 create mode 100644 src/sqlplate/rules.py
 delete mode 100644 src/sqlplate/sqlity.py

diff --git a/README.md b/README.md
index b305313..82efa46 100644
--- a/README.md
+++ b/README.md
@@ -116,17 +116,18 @@ WHEN NOT MATCHED THEN INSERT
 > This feature does not support yet!!!
 
 ```python
-from sqlplate import SQLity, Condition
+from sqlplate import SQLPlate
+from sqlplate.rules import Unique, NotNull, Count
 
 report: str = (
-    SQLity.format('databricks')
-    .template('quality')
+    SQLPlate.format('databricks')
+    .quality(mode="pushdown")
     .option('catalog', 'catalog-name')
     .option('schema', 'schema-name')
     .option('table', 'table-name')
-    .check('unique', Condition(cols=['pk_col'], rule="unique"))
-    .check('not-null', Condition(cols=['col01', 'col02'], rule="not-null"))
-    .check('row-count', Contition(rule="count"))
+    .check('unique', Unique(cols=['pk_col']))
+    .check('not-null', NotNull(cols=['col01', 'col02']))
+    .check('row-count', Count())
     .validate(output='html')
 )
 print(report.strip().strip('\n'))
diff --git a/src/sqlplate/__init__.py b/src/sqlplate/__init__.py
index e208322..73cf028 100644
--- a/src/sqlplate/__init__.py
+++ b/src/sqlplate/__init__.py
@@ -1,2 +1 @@
 from .sqlplate import SQLPlate
-from .sqlity import SQLity
diff --git a/src/sqlplate/rules.py b/src/sqlplate/rules.py
new file mode 100644
index 0000000..6c2f025
--- /dev/null
+++ b/src/sqlplate/rules.py
@@ -0,0 +1,10 @@
+class BaseRule: ...
+
+
+class Unique(BaseRule): ...
+
+
+class NotNull(BaseRule): ...
+
+
+class Count(BaseRule): ...
diff --git a/src/sqlplate/sqlity.py b/src/sqlplate/sqlity.py
deleted file mode 100644
index 76d6ad7..0000000
--- a/src/sqlplate/sqlity.py
+++ /dev/null
@@ -1,10 +0,0 @@
-# ------------------------------------------------------------------------------
-# Copyright (c) 2022 Korawich Anuttra. All rights reserved.
-# Licensed under the MIT License. See LICENSE in the project root for
-# license information.
-# ------------------------------------------------------------------------------
-from __future__ import annotations
-
-
-class SQLity:
-    """A SQLity object for render data quality report by Jinja template."""
diff --git a/src/sqlplate/sqlplate.py b/src/sqlplate/sqlplate.py
index 9ab5f33..1be5ed7 100644
--- a/src/sqlplate/sqlplate.py
+++ b/src/sqlplate/sqlplate.py
@@ -6,7 +6,7 @@
 from __future__ import annotations
 
 from pathlib import Path
-from typing import Any, Iterator, Optional, Callable
+from typing import Any, Iterator, Optional, Callable, Literal
 
 from jinja2 import Template
 
@@ -83,6 +83,9 @@ def template(self, name: str) -> 'SQLPlate':
         )
         return self
 
+    def quality(self, mode: Literal["pushdown", "memory"]) -> 'SQLPlate':
+        return self
+
     def option(self, key: str, value: Any) -> 'SQLPlate':
         """Pass an option key-value pair before generate template."""
         self._option[key] = value
@@ -144,3 +147,21 @@ def stream(
             )
             if trim(s) != ''
         )
+
+    def check(
+        self,
+        name: str,
+        rule: Any,
+    ) -> 'SQLPlate':
+        return self
+
+    def validate(
+        self,
+        output: Literal["json", "html"],
+    ):
+        return self
+
+    def filter(
+        self,
+    ):
+        return self

From a4b4a590bf9c1cde2a6de54ef4a3490f1d4ec899 Mon Sep 17 00:00:00 2001
From: Korawich Anuttra <korawich.anu@gmail.com>
Date: Wed, 19 Mar 2025 21:58:29 +0700
Subject: [PATCH 3/4] :gear: fixed: change way to get dq.

---
 README.md                              | 24 +++++++++++++++---------
 src/sqlplate/rules.py                  | 10 ----------
 src/sqlplate/sqlplate.py               | 18 ------------------
 templates/databricks/quality.check.sql | 19 +++++++++++++++++++
 tests/test_databricks.py               | 16 ++++++++++++++++
 5 files changed, 50 insertions(+), 37 deletions(-)
 delete mode 100644 src/sqlplate/rules.py
 create mode 100644 templates/databricks/quality.check.sql

diff --git a/README.md b/README.md
index 82efa46..3064444 100644
--- a/README.md
+++ b/README.md
@@ -69,7 +69,7 @@ print(statement.strip().strip('\n'))
 
 The result SQL statement:
 
-```text
+```sql
 MERGE INTO catalog-name.schema-name.table-name AS target
 USING (
     WITH change_query AS (
@@ -117,20 +117,26 @@ WHEN NOT MATCHED THEN INSERT
 
 ```python
 from sqlplate import SQLPlate
-from sqlplate.rules import Unique, NotNull, Count
 
-report: str = (
+statement: str = (
     SQLPlate.format('databricks')
-    .quality(mode="pushdown")
+    .template('quality.check')
     .option('catalog', 'catalog-name')
     .option('schema', 'schema-name')
     .option('table', 'table-name')
-    .check('unique', Unique(cols=['pk_col']))
-    .check('not-null', NotNull(cols=['col01', 'col02']))
-    .check('row-count', Count())
-    .validate(output='html')
+    .option('filter', "load_date >= to_timestamp('20250201', 'yyyyMMdd')")
+    .option('unique', ['pk_col'])
+    .option('notnull', ['col01', 'col02'])
+    .option('row_count', True)
+    .load()
 )
-print(report.strip().strip('\n'))
+print(statement.strip().strip('\n'))
+```
+
+The result SQL statement:
+
+```sql
+
 ```
 
 ## :chains: Support Systems
diff --git a/src/sqlplate/rules.py b/src/sqlplate/rules.py
deleted file mode 100644
index 6c2f025..0000000
--- a/src/sqlplate/rules.py
+++ /dev/null
@@ -1,10 +0,0 @@
-class BaseRule: ...
-
-
-class Unique(BaseRule): ...
-
-
-class NotNull(BaseRule): ...
-
-
-class Count(BaseRule): ...
diff --git a/src/sqlplate/sqlplate.py b/src/sqlplate/sqlplate.py
index 1be5ed7..b2f3565 100644
--- a/src/sqlplate/sqlplate.py
+++ b/src/sqlplate/sqlplate.py
@@ -147,21 +147,3 @@ def stream(
             )
             if trim(s) != ''
         )
-
-    def check(
-        self,
-        name: str,
-        rule: Any,
-    ) -> 'SQLPlate':
-        return self
-
-    def validate(
-        self,
-        output: Literal["json", "html"],
-    ):
-        return self
-
-    def filter(
-        self,
-    ):
-        return self
diff --git a/templates/databricks/quality.check.sql b/templates/databricks/quality.check.sql
new file mode 100644
index 0000000..1473f24
--- /dev/null
+++ b/templates/databricks/quality.check.sql
@@ -0,0 +1,19 @@
+{% extends "base.jinja" %}
+
+{% block statement %}
+WITH source AS (
+    SELECT
+        *
+    FROM {{ catalog }}.{{ schema }}.{{ table }}
+    {%+ if filter %}WHERE {{ filter }}{% endif +%}
+)
+SELECT
+    *
+    {%+ if row_count %}, (SELECT COUNT(1) FROM source) AS table_records{% endif +%}
+    {%+ if unique -%}
+        {%- for col in unique -%}
+    , (SELECT COUNT {{ col }} FROM (SELECT DISTINCT {{ col}} FROM source)) AS unique_{{ col }}
+        {%- endfor -%}
+    {%- endif +%}
+FROM source
+{% endblock statement %}
diff --git a/tests/test_databricks.py b/tests/test_databricks.py
index d1c1ee1..3593504 100644
--- a/tests/test_databricks.py
+++ b/tests/test_databricks.py
@@ -277,3 +277,19 @@ def test_sql_full_dump(template_path):
         FROM ( SELECT * FROM catalog-name.schema-name.source-name ) AS sub_query
         ;
         """).strip('\n')
+
+
+def test_quality_check(template_path):
+    statement: SQLPlate = (
+        SQLPlate.format('databricks', path=template_path)
+        .template('quality.check')
+        .option('catalog', 'catalog-name')
+        .option('schema', 'schema-name')
+        .option('table', 'table-name')
+        .option('filter', "load_date >= to_timestamp('20250201', 'yyyyMMdd')")
+        .option('unique', ['pk_col'])
+        .option('notnull', ['col01', 'col02'])
+        .option("row_count", True)
+        .load()
+    )
+    print(statement)

From 10dc388e0cfafd068fba31a2b8ed995f35f356a9 Mon Sep 17 00:00:00 2001
From: Korawich Anuttra <korawich.anu@gmail.com>
Date: Wed, 19 Mar 2025 23:47:33 +0700
Subject: [PATCH 4/4] :dart: feat: add tempate for quality check.

---
 README.md                               | 24 +++++++++++++++++++--
 templates/databricks/quality.check.sql  | 28 ++++++++++++++++++++-----
 templates/databricks/quality.metrix.sql | 11 ++++++++++
 tests/test_databricks.py                | 23 +++++++++++++++++++-
 4 files changed, 78 insertions(+), 8 deletions(-)
 create mode 100644 templates/databricks/quality.metrix.sql

diff --git a/README.md b/README.md
index 3064444..fe6d0a4 100644
--- a/README.md
+++ b/README.md
@@ -112,6 +112,9 @@ WHEN NOT MATCHED THEN INSERT
 
 ### Data Quality
 
+This package handle generate SQL statement only. For a data quality part, you can
+use the quality template.
+
 > [!IMPORTANT]
 > This feature does not support yet!!!
 
@@ -127,7 +130,8 @@ statement: str = (
     .option('filter', "load_date >= to_timestamp('20250201', 'yyyyMMdd')")
     .option('unique', ['pk_col'])
     .option('notnull', ['col01', 'col02'])
-    .option('row_count', True)
+    .option("contain", [("col01", ["A", "B", "C"])])
+    .option("validate", [("col03", "> 10000")])
     .load()
 )
 print(statement.strip().strip('\n'))
@@ -136,7 +140,23 @@ print(statement.strip().strip('\n'))
 The result SQL statement:
 
 ```sql
-
+WITH source AS (
+    SELECT
+        *
+    FROM
+        catalog-name.schema-name.table-name
+    WHERE load_date >= to_timestamp('20250201', 'yyyyMMdd')
+)
+, records AS (
+    SELECT COUNT(1)     AS table_records
+    FROM source
+)
+SELECT
+    (SELECT table_records FROM records) AS table_records
+    , ((SELECT COUNT( DISTINCT pk_col ) FROM source) = (SELECT table_records FROM records)) AS unique_pk_col
+    , (SELECT COUNT(1) FROM source WHERE pk_col IS NULL) = 0 AS notnull_pk_col
+    , (SELECT COUNT(1) FROM source WHERE col01 NOT IN ['A', 'B', 'C']) = 0 AS contain_col01
+    , ((SELECT COUNT(1) FROM source WHERE col03 > 10000)  = (SELECT table_records FROM records)) AS validate_col03
 ```
 
 ## :chains: Support Systems
diff --git a/templates/databricks/quality.check.sql b/templates/databricks/quality.check.sql
index 1473f24..37de899 100644
--- a/templates/databricks/quality.check.sql
+++ b/templates/databricks/quality.check.sql
@@ -4,16 +4,34 @@
 WITH source AS (
     SELECT
         *
-    FROM {{ catalog }}.{{ schema }}.{{ table }}
+    FROM
+        {{ catalog }}.{{ schema }}.{{ table }}
     {%+ if filter %}WHERE {{ filter }}{% endif +%}
 )
+, records AS (
+    SELECT COUNT(1)     AS table_records
+    FROM source
+)
 SELECT
-    *
-    {%+ if row_count %}, (SELECT COUNT(1) FROM source) AS table_records{% endif +%}
+    (SELECT table_records FROM records) AS table_records
     {%+ if unique -%}
         {%- for col in unique -%}
-    , (SELECT COUNT {{ col }} FROM (SELECT DISTINCT {{ col}} FROM source)) AS unique_{{ col }}
+    , ((SELECT COUNT( DISTINCT {{ col }} ) FROM source) = (SELECT table_records FROM records)) AS unique_{{ col }}
+        {%- endfor -%}
+    {%- endif +%}
+    {%+ if notnull -%}
+        {%- for col in unique -%}
+    , (SELECT COUNT(1) FROM source WHERE {{ col }} IS NULL) = 0 AS notnull_{{ col }}
+        {%- endfor -%}
+    {%- endif +%}
+    {%+ if contain -%}
+        {%- for col in contain -%}
+    , (SELECT COUNT(1) FROM source WHERE {{ col[0] }} NOT IN {{ col[1] }}) = 0 AS contain_{{ col[0] }}
+        {%- endfor -%}
+    {%- endif +%}
+    {%+ if contain -%}
+        {%- for col in validate -%}
+    , ((SELECT COUNT(1) FROM source WHERE {{ col[0] }} {{ col[1] }})  = (SELECT table_records FROM records)) AS validate_{{ col[0] }}
         {%- endfor -%}
     {%- endif +%}
-FROM source
 {% endblock statement %}
diff --git a/templates/databricks/quality.metrix.sql b/templates/databricks/quality.metrix.sql
new file mode 100644
index 0000000..5a21b34
--- /dev/null
+++ b/templates/databricks/quality.metrix.sql
@@ -0,0 +1,11 @@
+{% extends "base.jinja" %}
+
+{% block statement %}
+WITH source AS (
+    SELECT
+        *
+    FROM {{ catalog }}.{{ schema }}.{{ table }}
+    {%+ if filter %}WHERE {{ filter }}{% endif +%}
+)
+SELECT
+{% endblock statement %}
diff --git a/tests/test_databricks.py b/tests/test_databricks.py
index 3593504..41aec71 100644
--- a/tests/test_databricks.py
+++ b/tests/test_databricks.py
@@ -289,7 +289,28 @@ def test_quality_check(template_path):
         .option('filter', "load_date >= to_timestamp('20250201', 'yyyyMMdd')")
         .option('unique', ['pk_col'])
         .option('notnull', ['col01', 'col02'])
-        .option("row_count", True)
+        .option(
+            "contain",
+            [("col01", ["A", "B", "C"])],
+        )
+        .option(
+            "validate",
+            [("col03", "> 10000")],
+        )
+        .load()
+    )
+    print(statement)
+
+
+def test_quality_metrix(template_path):
+    statement: SQLPlate = (
+        SQLPlate.format('databricks', path=template_path)
+        .template('quality.metrix')
+        .option('catalog', 'catalog-name')
+        .option('schema', 'schema-name')
+        .option('table', 'table-name')
+        .option('filter', "load_date >= to_timestamp('20250201', 'yyyyMMdd')")
+        .option("metrix", ["col1", "col2", "col3"])
         .load()
     )
     print(statement)