Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
"""Tests for fact_utils.py — fact manipulation utilities."""

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from graphrag_toolkit.lexical_graph.indexing.utils.fact_utils import (
string_complement_to_entity,
)
from graphrag_toolkit.lexical_graph.indexing.model import Entity, Fact, Relation
from graphrag_toolkit.lexical_graph.indexing.constants import LOCAL_ENTITY_CLASSIFICATION


# ---------------------------------------------------------------------------
# string_complement_to_entity
# ---------------------------------------------------------------------------

class TestStringComplementToEntity:

def test_converts_string_complement_to_entity(self):
fact = Fact(
subject=Entity(value="A", classification="X"),
predicate=Relation(value="relates"),
complement="some string",
)
result = string_complement_to_entity(fact)

assert isinstance(result.complement, Entity)
assert result.complement.value == "some string"
assert result.complement.classification == LOCAL_ENTITY_CLASSIFICATION

def test_preserves_existing_entity_complement(self):
entity = Entity(value="B", classification="Y")
fact = Fact(
subject=Entity(value="A", classification="X"),
predicate=Relation(value="relates"),
complement=entity,
)
result = string_complement_to_entity(fact)

assert result.complement is entity

def test_none_complement_unchanged(self):
fact = Fact(
subject=Entity(value="A", classification="X"),
predicate=Relation(value="relates"),
complement=None,
)
result = string_complement_to_entity(fact)

assert result.complement is None
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,11 @@

String normalisation pipeline
------------------------------
format_value : replaces underscores with spaces (tested in test_format_helpers.py)
format_value : replaces underscores with spaces
format_text : converts string or list to newline-separated string
format_list : joins list items with newlines
format_classification : formats classification strings (underscore to space, title case)
strip_full_stop : removes trailing period from strings
remove_parenthetical_content : strips everything between the first '(' and last ')' on a line
remove_articles : strips leading English articles ('a ', 'an ', 'the ') case-insensitively
clean : composed pipeline — format_value -> remove_parenthetical_content -> remove_articles
Expand Down Expand Up @@ -37,9 +41,17 @@
- Lines outside any recognised state -> appended to garbage.
"""

# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0

import pytest

from graphrag_toolkit.lexical_graph.indexing.utils.topic_utils import (
format_text,
format_list,
format_value,
format_classification,
strip_full_stop,
remove_parenthetical_content,
remove_articles,
clean,
Expand All @@ -51,6 +63,89 @@
)


# ---------------------------------------------------------------------------
# format_text
# ---------------------------------------------------------------------------

class TestFormatText:

@pytest.mark.parametrize("input_val,expected", [
("hello", "hello"),
(["a", "b"], "a\nb"),
(["single"], "single"),
([], ""),
])
def test_format_text(self, input_val, expected):
assert format_text(input_val) == expected


# ---------------------------------------------------------------------------
# format_list
# ---------------------------------------------------------------------------

class TestFormatList:

@pytest.mark.parametrize("input_val,expected", [
(["a", "b", "c"], "a\nb\nc"),
(["only"], "only"),
([], ""),
])
def test_format_list(self, input_val, expected):
assert format_list(input_val) == expected


# ---------------------------------------------------------------------------
# format_value
# ---------------------------------------------------------------------------

class TestFormatValue:

@pytest.mark.parametrize("input_val,expected", [
("hello_world", "hello world"),
("nounderscore", "nounderscore"),
("", ""),
(None, ""),
("__double__", " double "),
])
def test_format_value(self, input_val, expected):
assert format_value(input_val) == expected


# ---------------------------------------------------------------------------
# format_classification
# ---------------------------------------------------------------------------

class TestFormatClassification:

@pytest.mark.parametrize("input_val,expected", [
("natural_language", "Natural Language"),
("UPPER", "Upper"),
("", ""),
(None, ""),
("single", "Single"),
])
def test_format_classification(self, input_val, expected):
assert format_classification(input_val) == expected


# ---------------------------------------------------------------------------
# strip_full_stop
# ---------------------------------------------------------------------------

class TestStripFullStop:

@pytest.mark.parametrize("input_val,expected", [
("hello.", "hello"),
("hello", "hello"),
(".", ""),
("", ""),
(None, None),
("hello...", "hello.."),
])
def test_strip_full_stop(self, input_val, expected):
assert strip_full_stop(input_val) == expected


# ---------------------------------------------------------------------------
# remove_parenthetical_content
# ---------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
# SPDX-License-Identifier: Apache-2.0
Empty file.
Loading