awslabs · iansrobinson · Mar 4, 2026 · Mar 3, 2026 · Mar 3, 2026
diff --git a/lexical-graph/tests/conftest.py → ...raphrag_toolkit/lexical_graph/conftest.py b/lexical-graph/tests/conftest.py → ...raphrag_toolkit/lexical_graph/conftest.py
diff --git a/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/__init__.py b/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/test_id_generator.py b/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/test_id_generator.py
diff --git a/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py b/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/.../unit/utils/test_batch_inference_utils.py → ...exing/utils/test_batch_inference_utils.py b/.../unit/utils/test_batch_inference_utils.py → ...exing/utils/test_batch_inference_utils.py
diff --git a/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_fact_utils.py b/lexical-graph/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_fact_utils.py
@@ -0,0 +1,52 @@
+"""Tests for fact_utils.py — fact manipulation utilities."""
+
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+import pytest
+
+from graphrag_toolkit.lexical_graph.indexing.utils.fact_utils import (
+    string_complement_to_entity,
+)
+from graphrag_toolkit.lexical_graph.indexing.model import Entity, Fact, Relation
+from graphrag_toolkit.lexical_graph.indexing.constants import LOCAL_ENTITY_CLASSIFICATION
+
+
+# ---------------------------------------------------------------------------
+# string_complement_to_entity
+# ---------------------------------------------------------------------------
+
+class TestStringComplementToEntity:
+
+    def test_converts_string_complement_to_entity(self):
+        fact = Fact(
+            subject=Entity(value="A", classification="X"),
+            predicate=Relation(value="relates"),
+            complement="some string",
+        )
+        result = string_complement_to_entity(fact)
+
+        assert isinstance(result.complement, Entity)
+        assert result.complement.value == "some string"
+        assert result.complement.classification == LOCAL_ENTITY_CLASSIFICATION
+
+    def test_preserves_existing_entity_complement(self):
+        entity = Entity(value="B", classification="Y")
+        fact = Fact(
+            subject=Entity(value="A", classification="X"),
+            predicate=Relation(value="relates"),
+            complement=entity,
+        )
+        result = string_complement_to_entity(fact)
+
+        assert result.complement is entity
+
+    def test_none_complement_unchanged(self):
+        fact = Fact(
+            subject=Entity(value="A", classification="X"),
+            predicate=Relation(value="relates"),
+            complement=None,
+        )
+        result = string_complement_to_entity(fact)
+
+        assert result.complement is None
diff --git a/...graph/tests/unit/utils/test_hash_utils.py → ...l_graph/indexing/utils/test_hash_utils.py b/...graph/tests/unit/utils/test_hash_utils.py → ...l_graph/indexing/utils/test_hash_utils.py
diff --git a/...h/tests/unit/utils/test_metadata_utils.py → ...aph/indexing/utils/test_metadata_utils.py b/...h/tests/unit/utils/test_metadata_utils.py → ...aph/indexing/utils/test_metadata_utils.py
diff --git a/...raph/tests/unit/utils/test_topic_utils.py → ..._graph/indexing/utils/test_topic_utils.py b/...raph/tests/unit/utils/test_topic_utils.py → ..._graph/indexing/utils/test_topic_utils.py
@@ -2,7 +2,11 @@
 
 String normalisation pipeline
 ------------------------------
-  format_value             : replaces underscores with spaces (tested in test_format_helpers.py)
+  format_value             : replaces underscores with spaces
+  format_text              : converts string or list to newline-separated string
+  format_list              : joins list items with newlines
+  format_classification    : formats classification strings (underscore to space, title case)
+  strip_full_stop          : removes trailing period from strings
   remove_parenthetical_content : strips everything between the first '(' and last ')' on a line
   remove_articles          : strips leading English articles ('a ', 'an ', 'the ') case-insensitively
   clean                    : composed pipeline — format_value -> remove_parenthetical_content -> remove_articles
@@ -37,9 +41,17 @@
 - Lines outside any recognised state -> appended to garbage.
 """
 
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
+
 import pytest
 
 from graphrag_toolkit.lexical_graph.indexing.utils.topic_utils import (
+    format_text,
+    format_list,
+    format_value,
+    format_classification,
+    strip_full_stop,
     remove_parenthetical_content,
     remove_articles,
     clean,
@@ -51,6 +63,89 @@
 )
 
 
+# ---------------------------------------------------------------------------
+# format_text
+# ---------------------------------------------------------------------------
+
+class TestFormatText:
+
+    @pytest.mark.parametrize("input_val,expected", [
+        ("hello", "hello"),
+        (["a", "b"], "a\nb"),
+        (["single"], "single"),
+        ([], ""),
+    ])
+    def test_format_text(self, input_val, expected):
+        assert format_text(input_val) == expected
+
+
+# ---------------------------------------------------------------------------
+# format_list
+# ---------------------------------------------------------------------------
+
+class TestFormatList:
+
+    @pytest.mark.parametrize("input_val,expected", [
+        (["a", "b", "c"], "a\nb\nc"),
+        (["only"], "only"),
+        ([], ""),
+    ])
+    def test_format_list(self, input_val, expected):
+        assert format_list(input_val) == expected
+
+
+# ---------------------------------------------------------------------------
+# format_value
+# ---------------------------------------------------------------------------
+
+class TestFormatValue:
+
+    @pytest.mark.parametrize("input_val,expected", [
+        ("hello_world", "hello world"),
+        ("nounderscore", "nounderscore"),
+        ("", ""),
+        (None, ""),
+        ("__double__", "  double  "),
+    ])
+    def test_format_value(self, input_val, expected):
+        assert format_value(input_val) == expected
+
+
+# ---------------------------------------------------------------------------
+# format_classification
+# ---------------------------------------------------------------------------
+
+class TestFormatClassification:
+
+    @pytest.mark.parametrize("input_val,expected", [
+        ("natural_language", "Natural Language"),
+        ("UPPER", "Upper"),
+        ("", ""),
+        (None, ""),
+        ("single", "Single"),
+    ])
+    def test_format_classification(self, input_val, expected):
+        assert format_classification(input_val) == expected
+
+
+# ---------------------------------------------------------------------------
+# strip_full_stop
+# ---------------------------------------------------------------------------
+
+class TestStripFullStop:
+
+    @pytest.mark.parametrize("input_val,expected", [
+        ("hello.", "hello"),
+        ("hello", "hello"),
+        (".", ""),
+        ("", ""),
+        (None, None),
+        ("hello...", "hello.."),
+    ])
+    def test_strip_full_stop(self, input_val, expected):
+        assert strip_full_stop(input_val) == expected
+
+
 # ---------------------------------------------------------------------------
 # remove_parenthetical_content
 # ---------------------------------------------------------------------------

diff --git a/lexical-graph/tests/unit/test_tenant_id.py → ...g_toolkit/lexical_graph/test_tenant_id.py b/lexical-graph/tests/unit/test_tenant_id.py → ...g_toolkit/lexical_graph/test_tenant_id.py
diff --git a/lexical-graph/tests/graphrag_toolkit/lexical_graph/utils/__init__.py b/lexical-graph/tests/graphrag_toolkit/lexical_graph/utils/__init__.py
@@ -0,0 +1,2 @@
+# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
+# SPDX-License-Identifier: Apache-2.0
diff --git a/...-graph/tests/unit/utils/test_arg_utils.py → ...kit/lexical_graph/utils/test_arg_utils.py b/...-graph/tests/unit/utils/test_arg_utils.py → ...kit/lexical_graph/utils/test_arg_utils.py
diff --git a/...l-graph/tests/unit/utils/test_io_utils.py → ...lkit/lexical_graph/utils/test_io_utils.py b/...l-graph/tests/unit/utils/test_io_utils.py → ...lkit/lexical_graph/utils/test_io_utils.py
diff --git a/...h/tests/unit/utils/test_reranker_utils.py → ...exical_graph/utils/test_reranker_utils.py b/...h/tests/unit/utils/test_reranker_utils.py → ...exical_graph/utils/test_reranker_utils.py
diff --git a/lexical-graph/tests/unit/__init__.py b/lexical-graph/tests/unit/__init__.py
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
		# SPDX-License-Identifier: Apache-2.0