diff --git a/src/iter.php b/src/iter.php index 52e0cef..c6e9e0f 100644 --- a/src/iter.php +++ b/src/iter.php @@ -85,6 +85,38 @@ function map(callable $function, $iterable) { } } +/** + * Leaves only unique occurrences by using a provided hash function. + * + * If hash function is not provided values of the iterable will be serialized and used for comparison. Using serialize + * as a hash function can require more memory then other more efficient hash functions but it prevents possible + * false positives if there are hash collisions. + * + * @param array|Traversable $iterable Iterable to remove duplicates from + * @param callable|null $hashFunction Hash function that returns the value which will be used to determine + * uniqueness of the element + * @return \Iterator + */ +function unique($iterable, callable $hashFunction = null) { + _assertIterable($iterable, 'First argument'); + $hashSet = []; + foreach ($iterable as $key => $value) { + if ($hashFunction === null) { + $hash = serialize($value); + } else { + $hash = $hashFunction($value); + } + + if (isset($hashSet[$hash])) { + continue; + } + + $hashSet[$hash] = ''; + + yield $key => $value; + } +} + /** * Applies a mapping function to all keys of an iterator. * diff --git a/test/iterTest.php b/test/iterTest.php index bed5133..a8307a6 100644 --- a/test/iterTest.php +++ b/test/iterTest.php @@ -44,6 +44,53 @@ public function testMap() { $this->assertSame([0, 3, 6, 9, 12, 15], toArray($mapped)); } + public function testUniqueWithoutHashFunction() { + $iterable = [1, 2, '2', '2', 3, 4, 4, null, null, 5, '', '', [1], [1], [2]]; + $expected = [1, 2, '2', 3, 4, null, 5, '', [1], [2]]; + $unique = unique($iterable, null); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueStringsWithHashFunction() { + $iterable = [ + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.', + ]; + $expected = [ + 'Lorem ipsum dolor sit amet, consectetur adipiscing elit.', + 'Proin tincidunt mollis dui id efficitur. Vivamus vitae tortor vitae velit imperdiet finibus vel eu lacus.', + ]; + $unique = unique($iterable, function ($v) { + return crc32($v); + }); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueObjectsWithHashFunction() { + $obj1 = new \stdClass(); + $obj1->a = 1; + $obj2 = new \stdClass(); + $obj2->a = 2; + $iterable = [$obj1, $obj1, $obj2]; + $expected = [$obj1, $obj2]; + $unique = unique($iterable, function ($v) { + return $v->a; + }); + $this->assertSame($expected, toArray($unique)); + } + + public function testUniqueObjectsWithoutHashFunction() { + $obj1 = new \stdClass(); + $obj1->a = 1; + $obj2 = new \stdClass(); + $obj2->a = 2; + $iterable = [$obj1, $obj1, $obj2]; + $expected = [$obj1, $obj2]; + $unique = unique($iterable); + $this->assertSame($expected, toArray($unique)); + } + public function testMapKeys() { $range = range(0, 5); $mapped = mapKeys(function($n) { return $n * 3; }, $range);