Skip to content

Commit bcc233d

Browse files
author
notactuallyfinn
committed
implemented list comparison and added tests for it
1 parent f0f1818 commit bcc233d

File tree

2 files changed

+240
-30
lines changed

2 files changed

+240
-30
lines changed

src/hermes/model/types/ld_list.py

Lines changed: 217 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
# SPDX-FileContributor: Michael Meinel
66
# SPDX-FileContributor: Michael Fritzsche
77

8+
from collections import deque
89
from types import NotImplementedType
910
from .ld_container import (
1011
ld_container,
@@ -16,7 +17,7 @@
1617
BASIC_TYPE,
1718
)
1819

19-
from typing import Generator, Union, Self, Any
20+
from typing import Generator, Hashable, Union, Self, Any
2021

2122

2223
class ld_list(ld_container):
@@ -241,11 +242,16 @@ def __eq__(
241242
],
242243
) -> Union[bool, NotImplementedType]:
243244
"""
244-
Returns wheter or not self is considered to be equal to other.
245+
Returns wheter or not self is considered to be equal to other.<br>
245246
If other is not an ld_list, it is converted first.
246247
For each index it is checked if the ids of the items at index in self and other match if both have one,
247-
if only one has an id all other values are compared.
248-
If self or other is considered unordered the comparison is more difficult and ...
248+
if only one has or neither have an id all other values are compared.<br>
249+
Note that due to those circumstances equality is not transitve
250+
meaning if a == b and b == c is is not guaranteed that a == c.<br>
251+
If self or other is considered unordered the comparison is more difficult. All items in self are compared
252+
with all items in other. On the resulting graph given by the realtion == the Hopcroft-Karp algoritm is used
253+
to determine if there exists a bijection reordering self so that the ordered comparison of self with other
254+
returns true.
249255
250256
:param self: The ld_list other is compared to.
251257
:type self: Self
@@ -257,7 +263,6 @@ def __eq__(
257263
If other is of the wrong type return NotImplemented instead.
258264
:rtype: bool | NotImplementedType
259265
"""
260-
# TODO: ld_lists with container_type "@set" have to be considered unordered
261266
# check if other has an acceptable type
262267
if not (isinstance(other, (list, ld_list)) or ld_list.is_container(other)):
263268
return NotImplemented
@@ -281,33 +286,215 @@ def __eq__(
281286
# lists will only contain string
282287
return self.item_list == other.item_list
283288

284-
# check if at each index the items are considered equal
285-
for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)):
286-
# check if items are values
287-
if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and
288-
(ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))):
289-
if not ld_container.are_values_equal(item, other_item):
289+
if self.container_type == other.container_type == "@list":
290+
# check if at each index the items are considered equal
291+
for index, (item, other_item) in enumerate(zip(self.item_list, other.item_list)):
292+
# check if items are values
293+
if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and
294+
(ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))):
295+
if not ld_container.are_values_equal(item, other_item):
296+
return False
297+
continue
298+
# check if both contain an id and compare
299+
if "@id" in item and "@id" in other_item:
300+
if item["@id"] != other_item["@id"]:
301+
return False
302+
continue
303+
# get the 'real' items (i.e. can also be ld_dicts or ld_lists)
304+
item = self[index]
305+
other_item = other[index]
306+
# compare using the correct equals method
307+
res = item.__eq__(other_item)
308+
if res == NotImplemented:
309+
# swap order if first try returned NotImplemented
310+
res = other_item.__eq__(item)
311+
# return false if the second comparison also fails or one of them returned false
312+
if res is False or res == NotImplemented:
290313
return False
291-
continue
292-
# check if both contain an id and compare
293-
if "@id" in item and "@id" in other_item:
294-
if item["@id"] != other_item["@id"]:
314+
# return true because no unequal elements where found
315+
return True
316+
else:
317+
# check which items in self are equal the which in other
318+
equality_pairs = [[] for i in range(len(self))] # j in equality_pairs[i] <=> self[i] == other[j]
319+
for index, item in enumerate(self.item_list):
320+
for other_index, other_item in enumerate(other.item_list):
321+
# check if items are values
322+
if ((ld_container.is_typed_json_value(item) or ld_container.is_json_value(item)) and
323+
(ld_container.is_typed_json_value(other_item) or ld_container.is_json_value(other_item))):
324+
if ld_container.are_values_equal(item, other_item):
325+
equality_pairs[index] += [other_index]
326+
continue
327+
# check if both contain an id and compare
328+
if "@id" in item and "@id" in other_item:
329+
if item["@id"] == other_item["@id"]:
330+
equality_pairs[index] += [other_index]
331+
continue
332+
# get the 'real' items (i.e. can also be ld_dicts or ld_lists)
333+
item = self[index]
334+
other_item = other[index]
335+
# compare using the correct equals method
336+
res = item.__eq__(other_item)
337+
if res == NotImplemented:
338+
# swap order if first try returned NotImplemented
339+
res = other_item.__eq__(item)
340+
# if one of both comparisons returned true the elements are equal
341+
if res:
342+
equality_pairs[index] += [other_index]
343+
if len(equality_pairs[index]) == 0:
344+
# there exists no element in other that is equal to item
295345
return False
296-
continue
297-
# get the 'real' items (i.e. can also be ld_dicts or ld_lists)
298-
item = self[index]
299-
other_item = other[index]
300-
# compare using the correct equals method
301-
res = item.__eq__(other_item)
302-
if res == NotImplemented:
303-
# swap order if first try returned NotImplemented
304-
res = other_item.__eq__(item)
305-
# return false if the second comparison also fails or one of them returned false
306-
if res is False or res == NotImplemented:
307-
return False
308-
309-
# return true because no unequal elements where found
310-
return True
346+
# check if there is a way to chose one index from equality_pairs[i] for every i
347+
# so that there are no two i's with the same chosen index.
348+
# If such a way exists self and other are considered equal. If not they are considered to be not equal.
349+
# solved via a Hopcroft-Karp algorithm variant:
350+
# The bipartite graph is the disjoint union of the vertices 1 to len(self) and
351+
# freely chosen ids for each list in equality_pairs.
352+
# The graph has an edge from i to the id of a list if i is contained in the list.
353+
item_count = len(self)
354+
verticies_set1 = {*range(item_count)}
355+
verticies_set2 = {*range(item_count, 2 * item_count)}
356+
edges = {i: tuple(j for j in verticies_set2 if i in equality_pairs[j - item_count]) for i in verticies_set1}
357+
return ld_list._hopcroft_karp(verticies_set1, verticies_set2, edges) == len(self)
358+
359+
@classmethod
360+
def _bfs_step(
361+
cls: Self, verticies1: set[Hashable], edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable],
362+
distances: dict[Hashable, Union[int, float]]
363+
) -> bool:
364+
"""
365+
Completes the BFS step of Hopcroft-Karp. I.e.:<br>
366+
Finds the shortest path from all unmatched verticies in verticies1 to any unmatched vertex in any value in edges
367+
where the connecting paths are alternating between matches and its complement.<br>
368+
It also marks each vertex in verticies1 with how few verticies from verticies1 have to be passed
369+
to reach the vertex from an unmatched one in verticies1. This is stored in distances.
370+
371+
:param verticies1: The set of verticies in the left partition of the bipartite graph.
372+
:type verticies1: set[Hashable]
373+
:param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in
374+
this format: Dictionary with keys being the vertices in the left partition and values being tuples
375+
of verticies in the right partition.)
376+
:type edges: dict[Hashable, tuple[Hashable]]
377+
:param matches: The current matching of verticies in the left partition with the ones in the right partition.
378+
:type matches: dict[Hashable, Hashable]
379+
:param distances: The reference to the dictionary mapping verticies of the left partition to the minimal
380+
number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left
381+
partition to the vertex that is the key.
382+
:type distances: dict[Hashable, Union[int, float]]
383+
384+
:returns: Wheter or not a alternating path from an unmatched vertex in the left partition to an unmatched vertex
385+
in the right partition exists.
386+
:rtype: bool
387+
"""
388+
# initialize the queue and set the distances to zero for unmatched vertices and to inf for all others
389+
queue = deque()
390+
for ver in verticies1:
391+
if matches[ver] is None:
392+
distances[ver] = 0
393+
queue.append(ver)
394+
else:
395+
distances[ver] = float("inf")
396+
distances[None] = float("inf")
397+
# begin BFS
398+
while len(queue) != 0:
399+
ver1 = queue.popleft()
400+
# if the current vertex has a distance less then the current minimal one from an unmatched vertex in the
401+
# left partition to an unmatched one in the right partition
402+
if distances[ver1] < distances[None]:
403+
# iterate over all vertices in the right partition connected to ver1
404+
for ver2 in edges[ver1]:
405+
# if the vertex ver2 is matched with (or None if not matched) wasn't visited yet
406+
if distances[matches[ver2]] == float("inf"):
407+
# initialize the distance and queue the vertex for further search
408+
distances[matches[ver2]] = distances[ver1] + 1
409+
queue.append(matches[ver2])
410+
# if a path to None i.e. an unmatched vertex in the right partition was found return true otherwise false
411+
return distances[None] != float("inf")
412+
413+
@classmethod
414+
def _dfs_step(
415+
cls: Self, ver: Hashable, edges: dict[Hashable, tuple[Hashable]], matches: dict[Hashable, Hashable],
416+
distances: dict[Hashable, Union[int, float]]
417+
) -> bool:
418+
"""
419+
Completes the DFS step of Hopcroft-Karp. I.e.:<br>
420+
Adds all edges on every path with the minimal path length to matches if they would be in the symmetric
421+
difference of matches and the set of edges on the union of the paths.
422+
423+
:param ver: The set of verticies in the left partition of the bipartite graph.
424+
:type vert: Hashable
425+
:param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in
426+
this format: Dictionary with keys being the vertices in the left partition and values being tuples
427+
of verticies in the right partition.)
428+
:type edges: dict[Hashable, tuple[Hashable]]
429+
:param matches: The current matching of verticies in the left partition with the ones in the right partition.
430+
:type matches: dict[Hashable, Hashable]
431+
:param distances: The reference to the dictionary mapping verticies of the left partition to the minimal
432+
number of verticies in the left partition that will be passed on a path from an unmatched vertex of the left
433+
partition to the vertex that is the key. The values will be replaced with float("inf") to mark already
434+
visited vertices.
435+
:type distances: dict[Hashable, Union[int, float]]
436+
437+
:returns: Wheter or not a path from the unmatched vertex ver in the left partition to an unmatched vertex
438+
in the right partition could still exist.
439+
:rtype: bool
440+
"""
441+
# recursion base case: None always has a shortest possible path to itself
442+
if ver is None:
443+
return True
444+
# iterate over all vertices connected to ver in the right partition
445+
for ver2 in edges[ver]:
446+
# if ver2 is on a path with minimal length and not all subtrees have been searched already
447+
if distances[matches[ver2]] == distances[ver] + 1:
448+
if cls._dfs_step(matches[ver], edges, matches, distances):
449+
# add the edge to the matches and return true
450+
matches[ver2] = ver
451+
matches[ver] = ver2
452+
return True
453+
# mark this vertex as completly searched
454+
distances[ver] = float("inf")
455+
return False
456+
457+
@classmethod
458+
def _hopcroft_karp(
459+
cls: Self, verticies1: set[Hashable], verticies2: set[Hashable], edges: dict[Hashable, tuple[Hashable]]
460+
) -> int:
461+
"""
462+
Implementation of Hopcroft-Karp. I.e.:<br>
463+
Finds how maximal number of edges with the property that no two edges share an endpoint (and startpoint)
464+
in the given bipartite graph.<br>
465+
Note that verticies1 and verticies2 have to be disjoint.
466+
467+
:param verticies1: The set of verticies in the left partition of the bipartite graph.
468+
:type verticies1: set[Hashable]
469+
:param verticies2: The set of verticies in the right partition of the bipartite graph.
470+
:type verticies2: set[Hashable]
471+
:param edges: The edges in the bipartite graph. (As the edges are bidirectional they are expected to be given in
472+
this format: Dictionary with keys being the vertices in the left partition and values being tuples
473+
of verticies in the right partition.)
474+
:type edges: dict[Hashable, tuple[Hashable]]
475+
476+
:returns: The number of edges.
477+
:rtype: int
478+
"""
479+
# initializes the first matching. None is a imaginary vertex to denote unmatched vertices.
480+
matches = dict()
481+
for ver in verticies1:
482+
matches[ver] = None
483+
for ver in verticies2:
484+
matches[ver] = None
485+
matching_size = 0
486+
distances = dict()
487+
while cls._bfs_step(verticies1, edges, matches, distances):
488+
# while a alternating path from an unmatched vertex in the left partition exits
489+
# recalculate the distances and
490+
# iterate over all unmatched vertices in the left partition.
491+
for ver in verticies1:
492+
if matches[ver] is None:
493+
# create the new matches dict and if a new edge was added increase the size of the matching
494+
if cls._dfs_step(ver, edges, matches, distances):
495+
matching_size += 1
496+
# return the size of the matching
497+
return matching_size
311498

312499
def __ne__(
313500
self: Self,

test/hermes_test/model/types/test_ld_list.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,29 @@ def test_build_in_comparison():
197197
assert li != li2
198198
li[0] = {"@type": "schema:foobar", "@value": "bar"}
199199
assert li != li2
200+
li = ld_list([], key="https://schema.org/name", context=[{"schema": "https://schema.org/"}])
201+
li2 = ld_list([{"@list": []}], key="https://schema.org/name", context=[{"schema2": "https://schema.org/"}])
202+
li.extend(["foo", "bar"])
203+
li2.extend(["bar", "foo"])
204+
assert li == li2
205+
li.append("bar")
206+
li2.append("foo")
207+
assert li != li2
208+
209+
210+
def test_hopcroft_karp():
211+
ver1 = {0, 1, 2, 3, 4}
212+
ver2 = {10, 11, 12, 13, 14}
213+
edges = {0: (10, 11), 1: (10, 14), 2: (12, 13), 3: (10, 14), 4: tuple([11])}
214+
assert ld_list._hopcroft_karp(ver1, ver2, edges) == 4
215+
edges[4] = (11, 13)
216+
assert ld_list._hopcroft_karp(ver1, ver2, edges) == 5
217+
ver1 = {0, 1, 2, 3, 4}
218+
ver2 = {(0, 1, 3), (0, 4), (2, ), (2, 4), (1, 3)}
219+
edges = {
220+
0: ((0, 1, 3), (0, 4)), 1: ((0, 1, 3), (1, 3)), 2: ((2,), (2, 4)), 3: ((0, 1, 3), (1, 3)), 4: ((0, 4), (2, 4))
221+
}
222+
assert ld_list._hopcroft_karp(ver1, ver2, edges) == 5
200223

201224

202225
def test_extend():

0 commit comments

Comments
 (0)