diff --git a/.github/workflows/continuous-integration.yml b/.github/workflows/continuous-integration.yml
index d7bfbf1be..e03ce089f 100644
--- a/.github/workflows/continuous-integration.yml
+++ b/.github/workflows/continuous-integration.yml
@@ -18,7 +18,7 @@ jobs:
     timeout-minutes: 60
     strategy:
       matrix:
-        python-version: ['3.8', '3.9', '3.10']
+        python-version: ['3.8', '3.9', '3.10','3.11','3.12','3.13']
 
     steps:
     - uses: actions/checkout@v4
diff --git a/digital_land/check.py b/digital_land/check.py
index 9f043e7ac..05d4c9e72 100644
--- a/digital_land/check.py
+++ b/digital_land/check.py
@@ -1,6 +1,5 @@
 import duckdb
 import logging
-import dask.dataframe as dd
 
 
 # TODO This might need to move into expectations as it is a form of data checking
@@ -8,24 +7,22 @@ def duplicate_reference_check(issues=None, csv_path=None):
     try:
         conn = duckdb.connect()
 
-        ddf = dd.read_csv(csv_path, dtype={"entry-date": "string"})
-        ddf.columns = ddf.columns.str.replace("-", "_")
-
-        filtered_df = ddf[ddf["field"] == "reference"].compute()  # noqa
-        conn.execute("CREATE TABLE filtered_table AS SELECT * FROM filtered_df")
         conn.execute(
-            "CREATE INDEX IF NOT EXISTS idx_field_value_date ON filtered_table(field, value, entry_date);"
+            f"CREATE TABLE filtered_table AS SELECT * FROM read_csv_auto('{csv_path}') WHERE \"field\" = 'reference'"
+        )
+        conn.execute(
+            'CREATE INDEX IF NOT EXISTS idx_field_value_date ON filtered_table(field, value, "entry-date");'
         )
         # SQL query to identify duplicate references
         sql = """
         SELECT
             "field",
             "value",
-            "entry_date",
+            "entry-date",
             COUNT(*) AS count,
-            STRING_AGG("entry_number"::TEXT, ',') AS entry_numbers
+            STRING_AGG("entry-number"::TEXT, ',') AS entry_numbers
         FROM filtered_table
-        GROUP BY "field", "value", "entry_date"
+        GROUP BY "field", "value", "entry-date"
         HAVING COUNT(*) > 1;
         """
 
diff --git a/digital_land/command_arguments.py b/digital_land/command_arguments.py
index 2f5e28535..4ed3bada7 100644
--- a/digital_land/command_arguments.py
+++ b/digital_land/command_arguments.py
@@ -39,7 +39,6 @@ def issue_dir(f):
 def operational_issue_dir(f):
     return click.option(
         "--operational-issue-dir",
-        "-i",
         type=click.Path(),
         default="performance/operational_issue/",
     )(f)
@@ -55,7 +54,7 @@ def column_field_dir(f):
 
 def output_log_dir(f):
     return click.option(
-        "--output-log-dir", "-i", type=click.Path(exists=True), default="log/"
+        "--output-log-dir", type=click.Path(exists=True), default="log/"
     )(f)
 
 
diff --git a/digital_land/datatype/wkt.py b/digital_land/datatype/wkt.py
index c5cce17fc..985067a5b 100644
--- a/digital_land/datatype/wkt.py
+++ b/digital_land/datatype/wkt.py
@@ -3,7 +3,7 @@
 import json
 import logging
 from shapely.geometry import shape, Point
-from shapely.errors import WKTReadingError
+from shapely.errors import ShapelyError
 from shapely.ops import transform
 from shapely.geometry import MultiPolygon
 from shapely.geometry.polygon import orient
@@ -55,7 +55,7 @@ def parse_wkt(value, boundary):
     else:
         try:
             geometry = shapely.wkt.loads(value)
-        except WKTReadingError:
+        except ShapelyError:
             try:
                 geometry = shapely.wkt.loads(shape(json.loads(value)).wkt)
                 return geometry, "invalid type geojson", None
@@ -163,7 +163,7 @@ def make_multipolygon(geometry):
                 temp_polygons = make_multipolygon(geom)
                 polygons.extend(temp_polygons.geoms)
             else:
-                logging.info(f"skipping {geom.geom_type}")
+                logging.debug(f"skipping {geom.geom_type}")
         return MultiPolygon(polygons)
 
     raise ValueError(f"unexpected geometry {geometry.geom_type}")
@@ -195,7 +195,7 @@ def normalise_geometry(geometry, simplification=0.000005):
     # uses a buffer to combine overlapping polyongs inside the multipolygon
     # this is very common when simplifying a geometry collection as it's
     # usually why it's a geometry collection not a multipolygon
-    # ToDO should this be in the make_multipolygon function? Should it record an error?
+    # TODO should this be in the make_multipolygon function? Should it record an error?
     if geometry:
         if not geometry.is_valid:
             geometry = geometry.buffer(0)
@@ -220,6 +220,7 @@ def dump_wkt(geometry, precision=6, dimensions=2):
     wkt = shapely.wkt.dumps(
         geometry, rounding_precision=precision, output_dimension=dimensions
     )
+
     return wkt.replace(", ", ",")
 
 
@@ -241,7 +242,7 @@ def normalise(self, value, default="", issues=None, boundary=None):
                         "",
                     )
                     boundary = DEFAULT_BOUNDARY
-            except WKTReadingError:
+            except ShapelyError:
                 issues.log("Error reading boundary - must be a WKT", "")
                 boundary = DEFAULT_BOUNDARY
         else:
diff --git a/digital_land/phase/combine.py b/digital_land/phase/combine.py
index 34c0571ac..38de0a5e5 100644
--- a/digital_land/phase/combine.py
+++ b/digital_land/phase/combine.py
@@ -9,6 +9,9 @@
 
 
 def combine_geometries(wkts, precision=6):
+    """
+    Combine multiple WKT geometries into a single geometry using a union operation. requires clean geometries to be provided
+    """
     # https://shapely.readthedocs.io/en/stable/manual.html#shapely.ops.unary_union
     geometries = [shapely.wkt.loads(x) for x in wkts]
     union = unary_union(geometries)
diff --git a/digital_land/phase/convert.py b/digital_land/phase/convert.py
index ebb5c93e1..a3728e884 100644
--- a/digital_land/phase/convert.py
+++ b/digital_land/phase/convert.py
@@ -1,5 +1,6 @@
 import csv
-from cchardet import UniversalDetector
+from typing import BinaryIO, Optional
+from charset_normalizer import from_fp, from_path
 import logging
 import json_stream
 import os
@@ -21,20 +22,28 @@ class ConversionError(Exception):
     pass
 
 
-def detect_file_encoding(path):
+def detect_file_encoding(path: str) -> Optional[str]:
+    """Detect the character encoding of a file on disk.
+
+    Returns the best-guess encoding name (e.g. "utf-8", "cp1252"), or None if
+    the file is empty or the encoding cannot be determined. UTF-8 BOM files are
+    returned as "utf-8-sig" so callers open them with automatic BOM stripping.
+    """
     with open(path, "rb") as f:
-        return detect_encoding(f)
-
-
-def detect_encoding(f):
-    detector = UniversalDetector()
-    detector.reset()
-    for line in f:
-        detector.feed(line)
-        if detector.done:
-            break
-    detector.close()
-    return detector.result["encoding"]
+        if f.read(3) == b"\xef\xbb\xbf":
+            return "utf-8-sig"
+    result = from_path(path).best()
+    return result.encoding if result else None
+
+
+def detect_encoding(f: BinaryIO) -> Optional[str]:
+    """Detect the character encoding of a binary file-like object.
+
+    Returns the best-guess encoding name (e.g. "utf-8", "cp1252"), or None if
+    the content is empty or the encoding cannot be determined.
+    """
+    result = from_fp(f).best()
+    return result.encoding if result else None
 
 
 def load_csv(path, encoding="UTF-8", log=None):
diff --git a/digital_land/phase/prune.py b/digital_land/phase/prune.py
index 154924605..a04db6364 100644
--- a/digital_land/phase/prune.py
+++ b/digital_land/phase/prune.py
@@ -51,7 +51,7 @@ def process(self, stream):
                 curie = f"{prefix}:{reference}"
                 entry_number = block["entry-number"]
 
-                logging.info(
+                logging.debug(
                     f"{resource} row {entry_number}: missing entity for {curie}"
                 )
                 logging.debug(block)
diff --git a/pyproject.toml b/pyproject.toml
index 19d458a49..f8a334c43 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -18,16 +18,16 @@ dependencies = [
     "datasette",
     "canonicaljson",
     "click",
-    "cchardet",
+    "charset-normalizer",
     "esridump",
     "pandas",
     "pyproj",
     "requests",
     "validators",
-    "xlrd==1.2.0",
+    "xlrd",
     "openpyxl",
-    "numpy<2",
-    "Shapely==2.0.2",
+    "numpy",
+    "Shapely",
     "SPARQLWrapper",
     "geojson",
     "spatialite",
@@ -36,8 +36,6 @@ dependencies = [
     "pydantic",
     "json-stream",
     "duckdb",
-    "dask",
-    "dask[dataframe]",
     "pyarrow",
     "pygit2",
     "boto3",
diff --git a/tests/acceptance/test_dataset_create.py b/tests/acceptance/test_dataset_create.py
index 097131604..09d5c78a5 100644
--- a/tests/acceptance/test_dataset_create.py
+++ b/tests/acceptance/test_dataset_create.py
@@ -367,7 +367,7 @@ def test_acceptance_dataset_create(
 
         assert pq_rows > 0, f"parquet file {file.stem} is empty"
         sql_rows = cursor.execute(
-            f"SELECT COUNT(*) FROM {file.stem.replace('-','_')};"
+            f"SELECT COUNT(*) FROM {file.stem.replace('-', '_')};"
         ).fetchone()[0]
         assert sql_rows > 0, f"database table {file.stem} is empty"
         assert (
diff --git a/tests/integration/phase/test_convert.py b/tests/integration/phase/test_convert.py
index 9ac261cc7..ebff2f676 100644
--- a/tests/integration/phase/test_convert.py
+++ b/tests/integration/phase/test_convert.py
@@ -2,7 +2,11 @@
 import json
 import pandas as pd
 from pathlib import Path
-from digital_land.phase.convert import ConvertPhase
+from digital_land.phase.convert import (
+    ConvertPhase,
+    detect_encoding,
+    detect_file_encoding,
+)
 
 # the convert phase ran even though the input file didn't exist might need another test
 # this is a problem because a sqlite file will be made otherwise
@@ -42,3 +46,76 @@ def test_convert_phase_process_converts_a_json_array(input_data, tmp_path: Path)
     assert len(input_df) == len(
         output_df
     ), "the number of rows in the input and output files should be the same"
+
+
+def test_detect_file_encoding_utf8_without_bom(tmp_path):
+    # UTF-8 multibyte sequences are structurally unique — unambiguous detection
+    path = tmp_path / "utf8.csv"
+    path.write_bytes("reference,name\nA1,Ångström Road\nA2,Héraclès\n".encode("utf-8"))
+
+    result = detect_file_encoding(str(path))
+
+    assert result is not None
+    assert "utf" in result.lower()
+
+
+def test_detect_file_encoding_utf8_with_bom(tmp_path):
+    # BOM makes encoding completely unambiguous
+    path = tmp_path / "utf8bom.csv"
+    path.write_bytes(b"\xef\xbb\xbf" + "reference,name\nA1,Café\n".encode("utf-8"))
+
+    result = detect_file_encoding(str(path))
+
+    assert result is not None
+    assert "utf" in result.lower()
+
+
+def test_detect_file_encoding_utf16(tmp_path):
+    # UTF-16 BOM is completely unambiguous
+    path = tmp_path / "utf16.csv"
+    path.write_bytes("reference,name\nA1,Café\n".encode("utf-16"))
+
+    result = detect_file_encoding(str(path))
+
+    assert result is not None
+    assert "utf" in result.lower()
+
+
+@pytest.mark.parametrize(
+    "encoding",
+    ["latin-1", "windows-1252"],
+)
+def test_detect_file_encoding_single_byte_returns_usable_encoding(encoding, tmp_path):
+    # latin-1, cp1252, and cp1250 share most byte values so detection is
+    # statistical — the contract is that the returned encoding can open the file
+    content = "reference,name\nA1,Café Street\nA2,Ångström Road\n"
+    path = tmp_path / "test.csv"
+    path.write_bytes(content.encode(encoding))
+
+    result = detect_file_encoding(str(path))
+
+    assert result is not None
+    path.read_bytes().decode(result)
+
+
+def test_detect_encoding_from_file_object(tmp_path):
+    # Verify the file-object variant works — use UTF-8 which is unambiguous
+    content = "reference,name\nA1,Ångström Road\nA2,Héraclès\n"
+    path = tmp_path / "utf8.csv"
+    path.write_bytes(content.encode("utf-8"))
+
+    with open(path, "rb") as f:
+        result = detect_encoding(f)
+
+    assert result is not None
+    assert "utf" in result.lower()
+
+
+def test_detect_file_encoding_empty_file_returns_a_usable_encoding(tmp_path):
+    path = tmp_path / "empty.csv"
+    path.write_bytes(b"")
+
+    result = detect_file_encoding(str(path))
+
+    # charset-normalizer returns utf_8 for empty files; callers must handle None too
+    assert result is None or "utf" in result.lower()
diff --git a/tests/integration/test_convert.py b/tests/integration/test_convert.py
index 43251fdd3..4eec98fbd 100755
--- a/tests/integration/test_convert.py
+++ b/tests/integration/test_convert.py
@@ -1,14 +1,105 @@
 #!/usr/bin/env -S py.test -svv
 import csv
-import difflib
-import filecmp
 import pathlib
 import pytest
 import xlsxwriter
+from shapely import wkt as shapely_wkt
 
 from click.testing import CliRunner
 from digital_land.cli import cli
-from tests.utils.helpers import print_diffs
+
+csv.field_size_limit(10_000_000)
+
+# Tolerance for bounding box comparison in degrees (~1km)
+BBOX_TOLERANCE = 0.01
+
+
+def _read_csv(path):
+    with open(path) as f:
+        reader = csv.DictReader(f)
+        rows = list(reader)
+        cols = reader.fieldnames or []
+    return rows, cols
+
+
+def _bounding_box(rows, wkt_col):
+    bounds = []
+    for row in rows:
+        if row.get(wkt_col):
+            geom = shapely_wkt.loads(row[wkt_col])
+            bounds.append(geom.bounds)  # (minx, miny, maxx, maxy)
+    if not bounds:
+        return None
+    return (
+        min(b[0] for b in bounds),
+        min(b[1] for b in bounds),
+        max(b[2] for b in bounds),
+        max(b[3] for b in bounds),
+    )
+
+
+def _assert_convert_properties(output_file, golden_file):
+    golden_rows, golden_cols = _read_csv(golden_file)
+    output_rows, output_cols = _read_csv(output_file)
+
+    # All columns from the golden master are present in the output
+    missing = set(golden_cols) - set(output_cols)
+    assert not missing, f"Missing columns in output: {missing}"
+
+    # Feature count (CSV records, not lines — handles multi-line quoted fields)
+    assert len(output_rows) == len(
+        golden_rows
+    ), f"Feature count changed: output={len(output_rows)}, golden={len(golden_rows)}"
+
+    # No completely empty rows
+    assert all(
+        any(v for v in row.values()) for row in output_rows
+    ), "Output contains completely empty rows"
+
+    wkt_col = golden_cols[0] if golden_cols and golden_cols[0] == "WKT" else None
+
+    if wkt_col:
+        # No empty geometries — allow only rows that were also empty in the golden
+        golden_empty = {i for i, r in enumerate(golden_rows) if not r.get(wkt_col)}
+        output_empty = {i for i, r in enumerate(output_rows) if not r.get(wkt_col)}
+        unexpected_empty = output_empty - golden_empty
+        assert not unexpected_empty, (
+            f"Rows with unexpected empty WKT (not empty in golden): "
+            f"{[i + 1 for i in sorted(unexpected_empty)[:5]]}"
+        )
+
+        # Geometry type unchanged (e.g. MULTIPOLYGON)
+        golden_types = {
+            r[wkt_col].split(" ((")[0] for r in golden_rows if r.get(wkt_col)
+        }
+        output_types = {
+            r[wkt_col].split(" ((")[0] for r in output_rows if r.get(wkt_col)
+        }
+        assert (
+            output_types == golden_types
+        ), f"Geometry type changed: output={output_types}, golden={golden_types}"
+
+        # Bounding box within tolerance — catches wrong projections or garbled geometry
+        g_bbox = _bounding_box(golden_rows, wkt_col)
+        o_bbox = _bounding_box(output_rows, wkt_col)
+        if g_bbox and o_bbox:
+            for i, label in enumerate(("min_x", "min_y", "max_x", "max_y")):
+                assert abs(o_bbox[i] - g_bbox[i]) <= BBOX_TOLERANCE, (
+                    f"Bounding box {label} drifted beyond tolerance: "
+                    f"output={o_bbox[i]:.6f}, golden={g_bbox[i]:.6f}"
+                )
+
+    # Identifier columns: non-WKT columns where every golden value is unique and
+    # non-empty. The set of values must match exactly between output and golden.
+    for col in golden_cols:
+        if col == wkt_col:
+            continue
+        golden_vals = [r[col] for r in golden_rows]
+        if len(golden_vals) == len(set(golden_vals)) and all(golden_vals):
+            output_vals = {r[col] for r in output_rows}
+            assert (
+                set(golden_vals) == output_vals
+            ), f"Identifier column '{col}' values changed"
 
 
 @pytest.mark.parametrize(
@@ -33,17 +124,7 @@ def test_convert(input_file, tmp_path):
     output_file = tmp_path / (input_file.stem + ".csv")
     _execute_convert(input_file, output_file)
     golden_master = input_file.with_suffix(".csv")
-
-    assert filecmp.cmp(output_file, golden_master), print_diffs(
-        output_file, golden_master
-    )
-
-
-def diff_macos_fuzzy(fromfile, tofile):
-    file_a = open(fromfile).readlines()
-    file_b = open(tofile).readlines()
-    matcher = difflib.SequenceMatcher(None, " ".join(file_a), " ".join(file_b))
-    return matcher.quick_ratio()
+    _assert_convert_properties(output_file, golden_master)
 
 
 @pytest.fixture()
diff --git a/tests/unit/datatype/test_multipolygon.py b/tests/unit/datatype/test_multipolygon.py
index f918ac444..647171c26 100644
--- a/tests/unit/datatype/test_multipolygon.py
+++ b/tests/unit/datatype/test_multipolygon.py
@@ -65,7 +65,7 @@ def test_normalise_geojson_provided():
     assert issues.rows[0]["issue-type"] == "invalid type geojson"
 
 
-def test_normalise_geometrycollection_provided():
+def test_normalise_geometry_collection_provided():
     wkt = MultiPolygonDataType()
     issues = IssueLog()
 
@@ -83,20 +83,11 @@ def test_normalise_geometrycollection_provided():
         "-0.176757 51.552538))))"
     )
 
-    expected = (
-        "MULTIPOLYGON (((-0.173644 51.549807,-0.173551 51.549811,"
-        "-0.173545 51.549902,-0.173545 51.549912,-0.173554 51.549913,-0.173550 51.549971,"
-        "-0.173689 51.549976,-0.173724 51.549935,-0.174041 51.549928,-0.174039 51.549828,"
-        "-0.174024 51.549799,-0.173644 51.549807)),((-0.176790 51.555154,"
-        "-0.176884 51.555212,-0.176896 51.555199,-0.176994 51.555231,-0.177025 51.555233,"
-        "-0.177041 51.555212,-0.177085 51.555194,-0.176995 51.555106,-0.176900 51.555147,"
-        "-0.176851 51.555112,-0.176790 51.555154)),((-0.176757 51.552538,"
-        "-0.176804 51.552519,-0.176793 51.552502,-0.176758 51.552515,-0.176707 51.552463,"
-        "-0.176595 51.552506,-0.176651 51.552562,-0.176746 51.552526,-0.176757 51.552538)))"
-    )
-
     actual = wkt.normalise(value, issues=issues)
-    assert actual == expected
+    actual_geom = shapely.wkt.loads(actual)
+    assert actual_geom.is_valid
+    assert actual_geom.geom_type == "MultiPolygon"
+    assert len(actual_geom.geoms) == 3
     assert len(issues.rows) == 0
 
 
diff --git a/tests/unit/datatype/test_wkt.py b/tests/unit/datatype/test_wkt.py
index abd4319f1..6b72d5ed5 100644
--- a/tests/unit/datatype/test_wkt.py
+++ b/tests/unit/datatype/test_wkt.py
@@ -66,38 +66,33 @@ def test_normalise_geojson_provided():
     assert issues.rows[0]["issue-type"] == "invalid type geojson"
 
 
-def test_normalise_geometrycollection_provided():
+def test_normalise_geometry_collection_successfully_converted_to_multipolygon():
+    """
+    ensure that a geometry collection is pocessed corectly into a multipolygon and checks key
+    values
+    """
     wkt = WktDataType()
     issues = IssueLog()
 
     value = (
-        "GEOMETRYCOLLECTION (MULTIPOLYGON (((-0.173644 51.549807, -0.173698 51.549808, -0.173711 51.549808, -0.173712 51.549804,"
-        "-0.173779 51.549803, -0.173966 51.549796, -0.174 51.549795, -0.174008 51.549796, -0.174024 51.549799, -0.174024 51.549799,"
-        "-0.174028 51.549803, -0.174031 51.549807, -0.174032 51.549812, -0.174034 51.549817, -0.174039 51.549828, -0.174041 51.549918,"
-        "-0.174041 51.549928, -0.173963 51.549928, -0.17393 51.549928, -0.173877 51.54993, -0.173806 51.549933, -0.173724 51.549935,"
-        "-0.173689 51.549976, -0.173645 51.549975, -0.17355 51.549971, -0.173554 51.549913, -0.173545 51.549912, -0.173545 51.549902,"
-        "-0.173547 51.549872, -0.173551 51.549811, -0.173644 51.549807)),((-0.17679 51.555154, -0.176851 51.555112, -0.1769 51.555147,"
-        "-0.176995 51.555106, -0.177028 51.555139, -0.177085 51.555194, -0.177041 51.555212, -0.177025 51.555233, -0.177006 51.555227,"
-        "-0.176994 51.555231, -0.176991 51.55523, -0.176896 51.555199, -0.176884 51.555212, -0.176822 51.555174, -0.17679 51.555154,"
+        "GEOMETRYCOLLECTION (MULTIPOLYGON (((-0.173644 51.549807,-0.173698 51.549808,-0.173711 51.549808,-0.173712 51.549804,"
+        "-0.173779 51.549803,-0.173966 51.549796,-0.174 51.549795,-0.174008 51.549796,-0.174024 51.549799,-0.174024 51.549799,"
+        "-0.174028 51.549803,-0.174031 51.549807,-0.174032 51.549812,-0.174034 51.549817,-0.174039 51.549828,-0.174041 51.549918,"
+        "-0.174041 51.549928,-0.173963 51.549928,-0.17393 51.549928,-0.173877 51.54993,-0.173806 51.549933,-0.173724 51.549935,"
+        "-0.173689 51.549976,-0.173645 51.549975,-0.17355 51.549971,-0.173554 51.549913,-0.173545 51.549912,-0.173545 51.549902,"
+        "-0.173547 51.549872,-0.173551 51.549811,-0.173644 51.549807)),((-0.17679 51.555154,-0.176851 51.555112,-0.1769 51.555147,"
+        "-0.176995 51.555106,-0.177028 51.555139,-0.177085 51.555194,-0.177041 51.555212,-0.177025 51.555233,-0.177006 51.555227,"
+        "-0.176994 51.555231,-0.176991 51.55523,-0.176896 51.555199,-0.176884 51.555212,-0.176822 51.555174,-0.17679 51.555154,"
         "-0.17679 51.555154)),((-0.176757 51.552538, -0.176746 51.552526, -0.176662 51.552557, -0.176651 51.552562, -0.176627 51.552538,"
         "-0.176595 51.552506, -0.176707 51.552463, -0.176758 51.552515, -0.176793 51.552502, -0.176804 51.552519, -0.176757 51.552538,"
         "-0.176757 51.552538))))"
     )
 
-    expected = (
-        "MULTIPOLYGON (((-0.173644 51.549807,-0.173551 51.549811,"
-        "-0.173545 51.549902,-0.173545 51.549912,-0.173554 51.549913,-0.173550 51.549971,"
-        "-0.173689 51.549976,-0.173724 51.549935,-0.174041 51.549928,-0.174039 51.549828,"
-        "-0.174024 51.549799,-0.173644 51.549807)),((-0.176790 51.555154,"
-        "-0.176884 51.555212,-0.176896 51.555199,-0.176994 51.555231,-0.177025 51.555233,"
-        "-0.177041 51.555212,-0.177085 51.555194,-0.176995 51.555106,-0.176900 51.555147,"
-        "-0.176851 51.555112,-0.176790 51.555154)),((-0.176757 51.552538,"
-        "-0.176804 51.552519,-0.176793 51.552502,-0.176758 51.552515,-0.176707 51.552463,"
-        "-0.176595 51.552506,-0.176651 51.552562,-0.176746 51.552526,-0.176757 51.552538)))"
-    )
-
     actual = wkt.normalise(value, issues=issues)
-    assert actual == expected
+    actual_geom = shapely.wkt.loads(actual)
+    assert actual_geom.is_valid
+    assert actual_geom.geom_type == "MultiPolygon"
+    assert len(actual_geom.geoms) == 3
     assert len(issues.rows) == 0
 
 
diff --git a/tests/unit/test_combine.py b/tests/unit/phase/test_combine.py
similarity index 98%
rename from tests/unit/test_combine.py
rename to tests/unit/phase/test_combine.py
index 7a88d64ae..5667aea9f 100755
--- a/tests/unit/test_combine.py
+++ b/tests/unit/phase/test_combine.py
@@ -12,7 +12,7 @@ def test_combine_geometries():
 
     combined_wkt = combine_geometries(
         [
-            "MULTIPOLYGON (((30 20,45 40,10 40,30 20)),((15 5,40 10,10 20,5 10,15 5))))",
+            "MULTIPOLYGON (((30 20,45 40,10 40,30 20)),((15 5,40 10,10 20,5 10,15 5)))",
             "MULTIPOLYGON (((30 10, 40 40, 20 40, 10 20, 30 10)))",
         ],
         precision=0,