diff --git a/.gitignore b/.gitignore
index 7dfea74f..90a24ee0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -32,6 +32,7 @@
 
 # build dir
 /build/
+/build_debug/
 
 # editor configuration
 /.vscode/
diff --git a/include/chopper/configuration.hpp b/include/chopper/configuration.hpp
index bae58feb..4d8e80ff 100644
--- a/include/chopper/configuration.hpp
+++ b/include/chopper/configuration.hpp
@@ -20,8 +20,21 @@
 namespace chopper
 {
 
+enum class partitioning_scheme : uint8_t
+{
+    blocked,       // 0
+    sorted,        // 1
+    folded,        // 2
+    weighted_fold, // 3
+    similarity,    // 4
+    lsh,           // 5
+    lsh_sim        // 6
+};
+
 struct configuration
 {
+    partitioning_scheme partitioning_approach{partitioning_scheme::lsh_sim};
+
     /*!\name General Configuration
      * \{
      */
@@ -77,6 +90,10 @@ struct configuration
     mutable seqan::hibf::concurrent_timer union_estimation_timer{};
     mutable seqan::hibf::concurrent_timer rearrangement_timer{};
     mutable seqan::hibf::concurrent_timer dp_algorithm_timer{};
+    mutable seqan::hibf::concurrent_timer lsh_algorithm_timer{};
+    mutable seqan::hibf::concurrent_timer search_partition_algorithm_timer{};
+    mutable seqan::hibf::concurrent_timer intital_partition_timer{};
+    mutable seqan::hibf::concurrent_timer small_layouts_timer{};
 
     void read_from(std::istream & stream);
 
diff --git a/include/chopper/layout/determine_split_bins.hpp b/include/chopper/layout/determine_split_bins.hpp
new file mode 100644
index 00000000..91400e42
--- /dev/null
+++ b/include/chopper/layout/determine_split_bins.hpp
@@ -0,0 +1,29 @@
+// --------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// --------------------------------------------------------------------------------------------------
+
+/*!\file
+ * \brief Provides chopper::determine_split_bins.
+ * \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
+ */
+
+#pragma once
+
+#include <vector>
+
+#include <chopper/configuration.hpp>
+
+namespace chopper::layout
+{
+
+std::pair<size_t, size_t> determine_split_bins(chopper::configuration const & config,
+                                               std::vector<size_t> const & positions,
+                                               std::vector<size_t> const & cardinalities,
+                                               size_t const num_technical_bins,
+                                               size_t const num_user_bins,
+                                               std::vector<std::vector<size_t>> & partitions);
+
+}
\ No newline at end of file
diff --git a/include/chopper/layout/execute.hpp b/include/chopper/layout/execute.hpp
index 52bfc3b7..00c4fa2e 100644
--- a/include/chopper/layout/execute.hpp
+++ b/include/chopper/layout/execute.hpp
@@ -13,12 +13,14 @@
 #include <chopper/configuration.hpp>
 
 #include <hibf/sketch/hyperloglog.hpp>
+#include <hibf/sketch/minhashes.hpp>
 
 namespace chopper::layout
 {
 
 int execute(chopper::configuration & config,
             std::vector<std::vector<std::string>> const & filenames,
-            std::vector<seqan::hibf::sketch::hyperloglog> const & sketches);
+            std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+            std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches);
 
 } // namespace chopper::layout
diff --git a/include/chopper/lsh.hpp b/include/chopper/lsh.hpp
new file mode 100644
index 00000000..caa83975
--- /dev/null
+++ b/include/chopper/lsh.hpp
@@ -0,0 +1,202 @@
+// --------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// --------------------------------------------------------------------------------------------------
+
+/*!\file
+ * \brief Provides chopper::adjust_seed.
+ * \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
+ */
+
+#pragma once
+
+#include <algorithm>
+
+namespace chopper
+{
+
+/*\brief foo
+ */
+struct Cluster
+{
+protected:
+    size_t representative_id{}; // representative id of the cluster; identifier;
+
+    std::vector<size_t> user_bins{}; // the user bins contained in thus cluster
+
+    std::optional<size_t> moved_id{std::nullopt}; // where this Clusters user bins where moved to
+
+public:
+    Cluster() = default;
+    Cluster(Cluster const &) = default;
+    Cluster(Cluster &&) = default;
+    Cluster & operator=(Cluster const &) = default;
+    Cluster & operator=(Cluster &&) = default;
+    ~Cluster() = default;
+
+    Cluster(size_t const id, size_t const user_bins_id) : representative_id{id}, user_bins({user_bins_id})
+    {}
+
+    Cluster(size_t const id) : Cluster{id, id}
+    {}
+
+    size_t id() const
+    {
+        return representative_id;
+    }
+
+    std::vector<size_t> const & contained_user_bins() const
+    {
+        return user_bins;
+    }
+
+    bool has_been_moved() const
+    {
+        return moved_id.has_value();
+    }
+
+    bool empty() const
+    {
+        return user_bins.empty();
+    }
+
+    size_t size() const
+    {
+        return user_bins.size();
+    }
+
+    bool is_valid(size_t id) const
+    {
+        bool const ids_equal = representative_id == id;
+        bool const properly_moved = has_been_moved() && empty();
+        bool const not_moved = !has_been_moved() && !empty();
+
+        return ids_equal && (properly_moved || not_moved);
+    }
+
+    size_t moved_to_cluster_id() const
+    {
+        assert(moved_id.has_value());
+        assert(is_valid(representative_id));
+        return moved_id.value();
+    }
+
+    void move_to(Cluster & target_cluster)
+    {
+        target_cluster.user_bins.insert(target_cluster.user_bins.end(), this->user_bins.begin(), this->user_bins.end());
+        this->user_bins.clear();
+        moved_id = target_cluster.id();
+    }
+
+    void sort_by_cardinality(std::vector<size_t> const & cardinalities)
+    {
+        std::ranges::sort(user_bins,
+                          [&cardinalities](auto const & v1, auto const & v2)
+                          {
+                              return cardinalities[v1] > cardinalities[v2];
+                          });
+    }
+};
+
+struct MultiCluster : Cluster
+{
+protected:
+    std::vector<std::vector<size_t>> user_bins{}; // the user bins contained in this cluster
+
+public:
+    MultiCluster() = default;
+    MultiCluster(MultiCluster const &) = default;
+    MultiCluster(MultiCluster &&) = default;
+    MultiCluster & operator=(MultiCluster const &) = default;
+    MultiCluster & operator=(MultiCluster &&) = default;
+    ~MultiCluster() = default;
+
+    MultiCluster(Cluster const & clust)
+    {
+        representative_id = clust.id();
+
+        if (clust.has_been_moved())
+            moved_id = clust.moved_to_cluster_id();
+        else
+            user_bins.push_back(clust.contained_user_bins());
+    }
+
+    std::vector<std::vector<size_t>> const & contained_user_bins() const
+    {
+        return user_bins;
+    }
+
+    // needs to be defined again because of redefinition of `user_bins`
+    bool empty() const
+    {
+        return user_bins.empty();
+    }
+
+    // needs to be defined again because of redefinition of `user_bins`
+    size_t size() const
+    {
+        return user_bins.size();
+    }
+
+    bool is_valid(size_t id) const
+    {
+        bool const ids_equal = representative_id == id;
+        bool const properly_moved = has_been_moved() && empty();
+        bool const not_moved = !has_been_moved() && !empty();
+
+        return ids_equal && (properly_moved || not_moved);
+    }
+
+    void move_to(MultiCluster & target_cluster)
+    {
+        target_cluster.user_bins.insert(target_cluster.user_bins.end(), this->user_bins.begin(), this->user_bins.end());
+        this->user_bins.clear();
+        moved_id = target_cluster.id();
+    }
+
+    // sort user bins within a Cluster by cardinality and the clusters themselves by size
+    void sort_by_cardinality(std::vector<size_t> const & cardinalities)
+    {
+        std::ranges::for_each(user_bins,
+                              [&cardinalities](auto & user_bin_cluster)
+                              {
+                                  std::ranges::sort(user_bin_cluster,
+                                                    [&cardinalities](auto const & v1, auto const & v2)
+                                                    {
+                                                        return cardinalities[v1] > cardinalities[v2];
+                                                    });
+                              });
+
+        std::ranges::sort(user_bins,
+                          [](auto const & c1, auto const & c2)
+                          {
+                              return c1.size() > c2.size();
+                          });
+    }
+};
+
+// A valid cluster is one that hasn't been moved but actually contains user bins
+// A valid cluster at position i is identified by the following equality: cluster[i].size() >= 1 && cluster[i][0] == i
+// A moved cluster is one that has been joined and thereby moved to another cluster
+// A moved cluster i is identified by the following: cluster[i].size() == 1 && cluster[i][0] != i
+// returns position of the representative cluster
+template <typename cluster_type>
+size_t LSH_find_representative_cluster(std::vector<cluster_type> const & clusters, size_t current_id)
+{
+    std::reference_wrapper<cluster_type const> representative = clusters[current_id];
+
+    assert(representative.get().is_valid(current_id));
+
+    while (representative.get().has_been_moved())
+    {
+        current_id = representative.get().moved_to_cluster_id();
+        representative = clusters[current_id]; // replace by next cluster
+        assert(representative.get().is_valid(current_id));
+    }
+
+    return current_id;
+}
+
+} // namespace chopper
diff --git a/src/chopper_layout.cpp b/src/chopper_layout.cpp
index 5fda4ddb..9df893a3 100644
--- a/src/chopper_layout.cpp
+++ b/src/chopper_layout.cpp
@@ -93,6 +93,7 @@ int chopper_layout(chopper::configuration & config, sharg::parser & parser)
 
     std::vector<std::vector<std::string>> filenames{};
     std::vector<seqan::hibf::sketch::hyperloglog> sketches{};
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
 
     if (input_is_a_sketch_file)
     {
@@ -106,6 +107,7 @@ int chopper_layout(chopper::configuration & config, sharg::parser & parser)
 
         filenames = std::move(sin.filenames); // No need to call check_filenames because the files are not read.
         sketches = std::move(sin.hll_sketches);
+        minHash_sketches = std::move(sin.minHash_sketches);
         validate_configuration(parser, config, sin.chopper_config);
     }
     else
@@ -128,17 +130,18 @@ int chopper_layout(chopper::configuration & config, sharg::parser & parser)
     if (!input_is_a_sketch_file)
     {
         config.compute_sketches_timer.start();
-        seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+        seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
         config.compute_sketches_timer.stop();
     }
 
-    exit_code |= chopper::layout::execute(config, filenames, sketches);
+    exit_code |= chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     if (!config.disable_sketch_output)
     {
         chopper::sketch::sketch_file sout{.chopper_config = config,
                                           .filenames = std::move(filenames),
-                                          .hll_sketches = std::move(sketches)};
+                                          .hll_sketches = std::move(sketches),
+                                          .minHash_sketches = std::move(minHash_sketches)};
         std::ofstream os{config.sketch_directory, std::ios::binary};
         cereal::BinaryOutputArchive oarchive{os};
         oarchive(sout);
@@ -151,11 +154,19 @@ int chopper_layout(chopper::configuration & config, sharg::parser & parser)
         output_stream << "sketching_in_seconds\t"
                       << "layouting_in_seconds\t"
                       << "union_estimation_in_seconds\t"
-                      << "rearrangement_in_seconds\n";
+                      << "rearrangement_in_seconds\t"
+                      << "lsh_in_seconds\t"
+                      << "intital_partition_timer_in_seconds\t"
+                      << "small_layouts_timer_in_seconds\t"
+                      << "search_best_p_in_seconds\n";
         output_stream << config.compute_sketches_timer.in_seconds() << '\t';
         output_stream << config.dp_algorithm_timer.in_seconds() << '\t';
         output_stream << config.union_estimation_timer.in_seconds() << '\t';
         output_stream << config.rearrangement_timer.in_seconds() << '\t';
+        output_stream << config.lsh_algorithm_timer.in_seconds() << '\t';
+        output_stream << config.intital_partition_timer.in_seconds() << '\t';
+        output_stream << config.small_layouts_timer.in_seconds() << '\t';
+        output_stream << config.search_partition_algorithm_timer.in_seconds() << '\n';
     }
 
     return exit_code;
diff --git a/src/layout/CMakeLists.txt b/src/layout/CMakeLists.txt
index 915c83d7..f2b16735 100644
--- a/src/layout/CMakeLists.txt
+++ b/src/layout/CMakeLists.txt
@@ -4,8 +4,14 @@ if (TARGET chopper::layout)
     return ()
 endif ()
 
-add_library (chopper_layout STATIC determine_best_number_of_technical_bins.cpp execute.cpp hibf_statistics.cpp
-                                   ibf_query_cost.cpp input.cpp output.cpp
+add_library (chopper_layout STATIC
+             determine_best_number_of_technical_bins.cpp
+             determine_split_bins.cpp
+             execute.cpp
+             hibf_statistics.cpp
+             ibf_query_cost.cpp
+             input.cpp
+             output.cpp
 )
 target_link_libraries (chopper_layout PUBLIC chopper::shared)
 add_library (chopper::layout ALIAS chopper_layout)
diff --git a/src/layout/determine_split_bins.cpp b/src/layout/determine_split_bins.cpp
new file mode 100644
index 00000000..fb4ba653
--- /dev/null
+++ b/src/layout/determine_split_bins.cpp
@@ -0,0 +1,153 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <numeric> // for allocator, string
+#include <vector>  // for vector
+
+#include <chopper/configuration.hpp>
+#include <chopper/layout/determine_split_bins.hpp>
+
+#include <hibf/layout/compute_fpr_correction.hpp>
+#include <hibf/layout/print_matrix.hpp> // for data_store
+#include <hibf/misc/divide_and_ceil.hpp>
+
+namespace chopper::layout
+{
+
+std::pair<size_t, size_t> determine_split_bins(chopper::configuration const & config,
+                                               std::vector<size_t> const & positions,
+                                               std::vector<size_t> const & cardinalities,
+                                               size_t const num_technical_bins,
+                                               size_t const num_user_bins,
+                                               std::vector<std::vector<size_t>> & partitions)
+{
+    if (num_user_bins == 0)
+        return {0, 0};
+
+    assert(num_technical_bins > 0u);
+    assert(num_user_bins > 0u);
+    assert(num_user_bins <= num_technical_bins);
+
+    auto const fpr_correction =
+        seqan::hibf::layout::compute_fpr_correction({.fpr = config.hibf_config.maximum_fpr, //
+                                                     .hash_count = config.hibf_config.number_of_hash_functions,
+                                                     .t_max = num_technical_bins});
+
+    std::vector<std::vector<size_t>> matrix(num_technical_bins); // rows
+    for (auto & v : matrix)
+        v.resize(num_user_bins, std::numeric_limits<size_t>::max()); // columns
+
+    std::vector<std::vector<size_t>> trace(num_technical_bins); // rows
+    for (auto & v : trace)
+        v.resize(num_user_bins, std::numeric_limits<size_t>::max()); // columns
+
+    size_t const extra_bins = num_technical_bins - num_user_bins + 1;
+
+    // initialize first column (first row is initialized with inf)
+    double const ub_cardinality = static_cast<double>(cardinalities[positions[0]]);
+    for (size_t i = 0; i < extra_bins; ++i)
+    {
+        size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * fpr_correction[i + 1]);
+        matrix[i][0] = seqan::hibf::divide_and_ceil(corrected_ub_cardinality, i + 1u);
+    }
+
+    // we must iterate column wise
+    for (size_t j = 1; j < num_user_bins; ++j)
+    {
+        double const ub_cardinality = static_cast<double>(cardinalities[positions[j]]);
+
+        for (size_t i = j; i < j + extra_bins; ++i)
+        {
+            size_t minimum{std::numeric_limits<size_t>::max()};
+
+            for (size_t i_prime = j - 1; i_prime < i; ++i_prime)
+            {
+                size_t const corrected_ub_cardinality =
+                    static_cast<size_t>(ub_cardinality * fpr_correction[(i - i_prime)]);
+                size_t score = std::max<size_t>(seqan::hibf::divide_and_ceil(corrected_ub_cardinality, i - i_prime),
+                                                matrix[i_prime][j - 1]);
+
+                // std::cout << "j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;
+
+                minimum = (score < minimum) ? (trace[i][j] = i_prime, score) : minimum;
+            }
+
+            matrix[i][j] = minimum;
+        }
+    }
+
+    // seqan::hibf::layout::print_matrix(matrix, num_technical_bins, num_user_bins, std::numeric_limits<size_t>::max());
+    //seqan::hibf::layout::print_matrix(trace, num_technical_bins, num_user_bins, std::numeric_limits<size_t>::max());
+
+    // backtracking
+    // first, in the last column, find the row with minimum score (it can happen that the last rows are equally good)
+    // the less rows, the better
+    size_t trace_i{num_technical_bins - 1};
+    size_t best_score{std::numeric_limits<size_t>::max()};
+    for (size_t best_i{0}; best_i < num_technical_bins; ++best_i)
+    {
+        if (matrix[best_i][num_user_bins - 1] < best_score)
+        {
+            best_score = matrix[best_i][num_user_bins - 1];
+            trace_i = best_i;
+        }
+    }
+    size_t const number_of_split_tbs{trace_i + 1}; // trace_i is the position. So +1 for the number
+
+    // now that we found the best trace_i start usual backtracking
+    size_t trace_j = num_user_bins - 1;
+
+    // size_t max_id{};
+    size_t max_size{};
+
+    size_t bin_id{};
+
+    while (trace_j > 0)
+    {
+        size_t next_i = trace[trace_i][trace_j];
+        size_t const number_of_bins = (trace_i - next_i);
+        size_t const cardinality = cardinalities[positions[trace_j]];
+        size_t const corrected_cardinality = static_cast<size_t>(cardinality * fpr_correction[number_of_bins]);
+        size_t const cardinality_per_bin = seqan::hibf::divide_and_ceil(corrected_cardinality, number_of_bins);
+
+        if (cardinality_per_bin > max_size)
+        {
+            // max_id = bin_id;
+            max_size = cardinality_per_bin;
+        }
+
+        for (size_t splits{0}; splits < number_of_bins; ++splits)
+        {
+            partitions[partitions.size() - 1 - bin_id].push_back(positions[trace_j]);
+            ++bin_id;
+        }
+
+        trace_i = trace[trace_i][trace_j];
+        --trace_j;
+    }
+    ++trace_i; // because we want the length not the index. Now trace_i == number_of_bins
+    size_t const cardinality = cardinalities[positions[0]];
+    size_t const corrected_cardinality = static_cast<size_t>(cardinality * fpr_correction[trace_i]);
+    // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
+    size_t const cardinality_per_bin = seqan::hibf::divide_and_ceil(corrected_cardinality, trace_i);
+
+    if (cardinality_per_bin > max_size)
+    {
+        // max_id = bin_id;
+        max_size = cardinality_per_bin;
+    }
+
+    for (size_t splits{0}; splits < trace_i; ++splits)
+    {
+        partitions[partitions.size() - 1 - bin_id].push_back(positions[0]);
+        ++bin_id;
+    }
+
+    return {number_of_split_tbs, max_size};
+}
+
+} // namespace chopper::layout
diff --git a/src/layout/execute.cpp b/src/layout/execute.cpp
index 6630447e..e4931196 100644
--- a/src/layout/execute.cpp
+++ b/src/layout/execute.cpp
@@ -5,6 +5,7 @@
 // shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
 // ---------------------------------------------------------------------------------------------------
 
+#include <algorithm>
 #include <cassert>
 #include <cinttypes>
 #include <cmath>
@@ -12,6 +13,9 @@
 #include <filesystem>
 #include <fstream>
 #include <iostream>
+#include <numeric>
+#include <optional>
+#include <random>
 #include <stdexcept>
 #include <string>
 #include <tuple>
@@ -19,60 +23,1200 @@
 
 #include <chopper/configuration.hpp>
 #include <chopper/layout/determine_best_number_of_technical_bins.hpp>
+#include <chopper/layout/determine_split_bins.hpp>
 #include <chopper/layout/execute.hpp>
 #include <chopper/layout/hibf_statistics.hpp>
 #include <chopper/layout/output.hpp>
+#include <chopper/lsh.hpp>
 #include <chopper/next_multiple_of_64.hpp>
+#include <chopper/sketch/output.hpp>
 
+#include <hibf/contrib/robin_hood.hpp>
+#include <hibf/layout/compute_fpr_correction.hpp>
 #include <hibf/layout/compute_layout.hpp>
+#include <hibf/layout/compute_relaxed_fpr_correction.hpp> // for compute_relaxed_fpr_correction
 #include <hibf/layout/layout.hpp>
+#include <hibf/misc/divide_and_ceil.hpp>
 #include <hibf/misc/iota_vector.hpp>
 #include <hibf/sketch/estimate_kmer_counts.hpp> // for estimate_kmer_counts
 #include <hibf/sketch/hyperloglog.hpp>
+#include <hibf/sketch/toolbox.hpp>
 
 namespace chopper::layout
 {
 
+uint64_t lsh_hash_the_sketch(std::vector<uint64_t> const & sketch, size_t const number_of_hashes_to_consider)
+{
+    return std::reduce(sketch.begin(), sketch.begin() + number_of_hashes_to_consider);
+}
+
+template <typename cluster_type>
+auto LSH_fill_hashtable(std::vector<cluster_type> const & clusters,
+                        std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                        size_t const current_sketch_index,
+                        size_t const current_number_of_sketch_hashes)
+{
+    static constexpr bool is_multi_cluster = std::same_as<cluster_type, MultiCluster>;
+    static_assert(std::same_as<cluster_type, Cluster> || std::same_as<cluster_type, MultiCluster>);
+
+    robin_hood::unordered_flat_map<uint64_t, std::vector<size_t>> table;
+
+    [[maybe_unused]] size_t processed_user_bins{0}; // only for sanity check
+
+    auto get_user_bin_idx = [](cluster_type const & cluster)
+    {
+        if constexpr (is_multi_cluster)
+            return std::views::join(cluster.contained_user_bins());
+        else
+            return cluster.contained_user_bins();
+    };
+
+    for (size_t pos = 0; pos < clusters.size(); ++pos)
+    {
+        auto const & current = clusters[pos];
+        assert(current.is_valid(pos));
+
+        if (current.has_been_moved()) // cluster has been moved somewhere else, don't process
+            continue;
+
+        for (size_t const user_bin_idx : get_user_bin_idx(current))
+        {
+            ++processed_user_bins;
+            uint64_t const key = lsh_hash_the_sketch(minHash_sketches[user_bin_idx].table[current_sketch_index],
+                                                     current_number_of_sketch_hashes);
+            table[key].push_back(current.id()); // insert representative for all user bins
+        }
+    }
+    assert(processed_user_bins == clusters.size()); // all user bins should've been processed by one of the clusters
+
+    // uniquify list. Since I am inserting representative_idx's into the table, the same number can
+    // be inserted into multiple splots, and multiple times in the same slot.
+    for (auto & [key, list] : table)
+    {
+        std::ranges::sort(list);
+        auto const ret = std::ranges::unique(list);
+        list.erase(ret.begin(), ret.end());
+    }
+
+    return table;
+}
+
+// minHash_sketches data structure:
+// Vector L1 : number of user bins
+// Vector L2 : number_of_max_minHash_sketches (LSH ADD+OR parameter b)
+// Vector L3 : minHash_sketch_size (LSH ADD+OR parameter r)
+
+std::vector<Cluster> very_similar_LSH_partitioning(std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                                                   std::vector<size_t> const & positions,
+                                                   std::vector<size_t> const & cardinalities,
+                                                   std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                                                   size_t const average_technical_bin_size,
+                                                   chopper::configuration const & config)
+{
+    assert(!minHash_sketches.empty());
+    assert(!minHash_sketches[0].table.empty());
+    assert(!minHash_sketches[0].table[0].empty());
+
+    size_t const number_of_user_bins{positions.size()};
+    assert(number_of_user_bins <= minHash_sketches.size());
+    size_t const number_of_max_minHash_sketches{3}; // LSH ADD+OR parameter b
+    // size_t const minHash_sketch_size{minHash_sketches[0].table[0].size()};   // LSH ADD+OR parameter r
+    size_t const minHash_sketch_size{5}; // LSH ADD+OR parameter r
+    seqan::hibf::sketch::hyperloglog const empty_sketch{config.hibf_config.sketch_bits};
+    // std::cout << "sketch size available: " << minHash_sketches[0].table[0].size() << std::endl;
+    // std::cout << "sketch size used here: " << minHash_sketch_size << std::endl;
+    // initialise clusters with a signle user bin per cluster.
+    // clusters are either
+    // 1) of size 1; containing an id != position where the id points to the cluster it has been moved to
+    //    e.g. cluster[Y] = {Z} (Y has been moved into Z, so Z could look likes this cluster[Z] = {Z, Y})
+    // 2) of size >= 1; with the first entry beging id == position (a valid cluster)
+    //    e.g. cluster[X] = {X}       // valid singleton
+    //    e.g. cluster[X] = {X, a, b, c, ...}   // valid cluster with more joined entries
+    // The clusters could me moved recursively, s.t.
+    // cluster[A] = {B}
+    // cluster[B] = {C}
+    // cluster[C] = {C, A, B} // is valid cluster since cluster[C][0] == C; contains A and B
+    std::vector<Cluster> clusters;
+    clusters.reserve(number_of_user_bins);
+
+    std::vector<size_t> current_cluster_cardinality(number_of_user_bins);
+    std::vector<seqan::hibf::sketch::hyperloglog> current_cluster_sketches(number_of_user_bins, empty_sketch);
+    size_t current_max_cluster_size{0};
+    size_t current_number_of_sketch_hashes{minHash_sketch_size}; // start with high r but decrease it iteratively
+    size_t current_sketch_index{0};
+
+    for (size_t pos = 0; pos < number_of_user_bins; ++pos)
+    {
+        clusters.emplace_back(pos, positions[pos]);
+        current_cluster_cardinality[pos] = cardinalities[positions[pos]];
+        current_cluster_sketches[pos] = sketches[positions[pos]];
+        current_max_cluster_size = std::max(current_max_cluster_size, cardinalities[positions[pos]]);
+    }
+
+    auto get_cluster = [&clusters](size_t const current) -> Cluster &
+    {
+        size_t const cluster_id = LSH_find_representative_cluster(clusters, current);
+        return clusters[cluster_id];
+    };
+
+    // refine clusters
+    //std::cout << "Start clustering with threshold average_technical_bin_size: " << average_technical_bin_size << std::endl;
+    while (current_max_cluster_size < average_technical_bin_size
+           && /*number_of_clusters / static_cast<double>(number_of_user_bins) > 0.5 &&*/
+           current_sketch_index < number_of_max_minHash_sketches) // I want to cluster 10%?
+    {
+        // fill LSH collision hashtable
+        robin_hood::unordered_flat_map<uint64_t, std::vector<size_t>> table =
+            LSH_fill_hashtable(clusters, minHash_sketches, current_sketch_index, current_number_of_sketch_hashes);
+
+        // read out LSH collision hashtable
+        // for each present key, if the list contains more than one cluster, we merge everything contained in the list
+        // into the first cluster, since those clusters collide and should be joined in the same bucket
+        for (auto & [key, list] : table)
+        {
+            assert(!list.empty());
+
+            if (list.size() <= 1) // nothing to do here
+                continue;
+
+            // Now combine all clusters into the first.
+
+            // 1) find the representative cluster to merge everything else into
+            // It can happen, that the representative has already been joined with another cluster
+            // e.g.
+            // [key1] = {0,11}  // then clusters[11] is merged into clusters[0]
+            // [key2] = {11,13} // now I want to merge clusters[13] into clusters[11] but the latter has been moved
+            auto & representative_cluster = get_cluster(list[0]);
+            assert(representative_cluster.id() == clusters[representative_cluster.id()].id());
+
+            auto & representative_cluster_sketch = current_cluster_sketches[representative_cluster.id()];
+
+            for (size_t const current : std::views::drop(list, 1))
+            {
+                // For every other entry in the list, it can happen that I already joined that list with another
+                // e.g.
+                // [key1] = {0,11}  // then clusters[11] is merged into clusters[0]
+                // [key2] = {0, 2, 11} // now I want to do it again
+                auto & next_cluster = get_cluster(current);
+
+                if (next_cluster.id() == representative_cluster.id()) // already joined
+                    continue;
+
+                next_cluster.move_to(representative_cluster); // otherwise join next_cluster into representative_cluster
+                assert(next_cluster.empty());
+                assert(next_cluster.has_been_moved());
+                assert(representative_cluster.size() > 1); // there should be at least two user bins now
+
+                representative_cluster_sketch.merge(current_cluster_sketches[next_cluster.id()]);
+            }
+
+            current_cluster_cardinality[representative_cluster.id()] = representative_cluster_sketch.estimate();
+        }
+        current_max_cluster_size = std::ranges::max(current_cluster_cardinality);
+
+        ++current_sketch_index;
+    }
+
+    return clusters;
+}
+
+void post_process_clusters(std::vector<Cluster> & clusters,
+                           std::vector<size_t> const & cardinalities,
+                           chopper::configuration const & config)
+{
+    // clusters are done. Start post processing
+    // since post processing involves re-ordering the clusters, the moved_to_cluster_id value of a cluster will not
+    // refer to the position of the cluster in the `clusters` vecto anymore but the cluster with the resprive id()
+    // would neet to be found
+    for (size_t pos = 0; pos < clusters.size(); ++pos)
+    {
+        assert(clusters[pos].is_valid(pos));
+        clusters[pos].sort_by_cardinality(cardinalities);
+    }
+
+    // push largest p clusters to the front
+    std::ranges::partial_sort(clusters,
+                              std::ranges::next(clusters.begin(), config.hibf_config.tmax, clusters.end()),
+                              [&cardinalities](auto const & v1, auto const & v2)
+                              {
+                                  // Note: If v2 is empty, so is v1.
+                                  if (v1.size() == v2.size() && !v2.empty())
+                                      return cardinalities[v1.contained_user_bins().front()]
+                                           > cardinalities[v2.contained_user_bins().front()];
+
+                                  return v1.size() > v2.size();
+                              });
+
+    // after filling up the partitions with the biggest clusters, sort the clusters by cardinality of the biggest ub
+    // s.t. that euqally sizes ub are assigned after each other and the small stuff is added at last.
+    // the largest ub is already at the start because of former sorting.
+    std::ranges::sort(std::ranges::next(clusters.begin(), config.hibf_config.tmax, clusters.end()),
+                      clusters.end(),
+                      [&cardinalities](auto const & v1, auto const & v2)
+                      {
+                          if (v1.empty())
+                              return false; // v1 can never be larger than v2 then
+
+                          if (v2.empty()) // and v1 is not, since the first if would catch
+                              return true;
+
+                          return cardinalities[v1.contained_user_bins().front()]
+                               > cardinalities[v2.contained_user_bins().front()];
+                      });
+
+    assert(clusters.size() < 2 || clusters[0].size() >= clusters[1].size()); // sanity check
+    // assert(cardinalities[clusters[std::min<size_t>(clusters.size(), config.hibf_config.tmax)].contained_user_bins()[0]] >= cardinalities[clusters[std::min<size_t>(clusters.size(), config.hibf_config.tmax) + 1].contained_user_bins()[0]]); // sanity check
+
+    // debug
+    for (size_t cidx = 1; cidx < clusters.size(); ++cidx)
+    {
+        // once empty - always empty; all empty clusters should be at the end
+        if (clusters[cidx - 1].empty() && !clusters[cidx].empty())
+            throw std::runtime_error{"sorting did not work"};
+    }
+    // debug
+}
+
+bool find_best_partition(chopper::configuration const & config,
+                         size_t const number_of_partitions,
+                         size_t & corrected_estimate_per_part,
+                         std::vector<size_t> const & cluster,
+                         std::vector<size_t> const & cardinalities,
+                         std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                         std::vector<std::vector<size_t>> & positions,
+                         std::vector<seqan::hibf::sketch::hyperloglog> & partition_sketches,
+                         std::vector<size_t> & max_partition_cardinality,
+                         std::vector<size_t> & min_partition_cardinality)
+{
+    seqan::hibf::sketch::hyperloglog const current_sketch = [&sketches, &cluster, &config]()
+    {
+        seqan::hibf::sketch::hyperloglog result{config.hibf_config.sketch_bits};
+
+        for (size_t const user_bin_idx : cluster)
+            result.merge(sketches[user_bin_idx]);
+
+        return result;
+    }();
+
+    size_t const max_card = [&cardinalities, &cluster]()
+    {
+        size_t max{0};
+
+        for (size_t const user_bin_idx : cluster)
+            max = std::max(max, cardinalities[user_bin_idx]);
+
+        return max;
+    }();
+
+    // TODO: afterwads check if I should again merge by how little the effective text ratio grows
+
+    // search best partition fit by similarity
+    // similarity here is defined as:
+    // "whose (<-partition) effective text size is subsumed most by the current user bin"
+    // or in other words:
+    // "which partition has the largest intersection with user bin b compared to its own (partition) size."
+    // double best_subsume_ratio{0.0};
+    size_t smallest_change{std::numeric_limits<size_t>::max()};
+    size_t best_p{0};
+    bool best_p_found{false};
+
+    auto penalty_lower_level = [&](size_t const additional_number_of_user_bins, size_t const p) -> size_t
+    {
+        size_t const min = std::min(max_card, min_partition_cardinality[p]);
+        size_t const max = std::max(max_card, max_partition_cardinality[p]);
+
+        if (positions[p].size() > config.hibf_config.tmax) // already a third level
+        {
+            // if there must already be another lower level because the current merged bin contains more than tmax
+            // user bins, then the current user bin is very likely stored multiple times. Therefore, the penalty is set
+            // to the cardinality of the current user bin times the number of levels, e.g. the number of times this user
+            // bin needs to be stored additionally
+            size_t const num_ubs_in_merged_bin{positions[p].size() + additional_number_of_user_bins};
+            double const levels = std::log(num_ubs_in_merged_bin) / std::log(config.hibf_config.tmax);
+            return static_cast<size_t>(max_card * levels);
+        }
+        else if (positions[p].size() + additional_number_of_user_bins > config.hibf_config.tmax) // now a third level
+        {
+            // if the current merged bin contains exactly tmax UBS, adding otherone must
+            // result in another lower level. Most likely, the smallest user bin will end up on the lower level
+            // therefore the penalty is set to 'min * tmax'
+            // of course, there could also be a third level with a lower number of user bins, but this is hard to
+            // estimate.
+            size_t const penalty = min * config.hibf_config.tmax;
+            return penalty;
+        }
+        else // positions[p].size() + additional_number_of_user_bins < tmax
+        {
+            // if the new user bin is smaller than all other already contained user bins
+            // the waste of space is high if stored in a single technical bin
+            if (max_card < min)
+                return (max - max_card);
+            // if the new user bin is bigger than all other already contained user bins, the IBF size increases
+            else if (max_card > max)
+                return (max_card - max) * config.hibf_config.tmax;
+            // else, end if-else-block and zero is returned
+        }
+
+        return 0u;
+    };
+
+    for (size_t p = 0; p < number_of_partitions; ++p)
+    {
+        seqan::hibf::sketch::hyperloglog union_sketch = current_sketch;
+        union_sketch.merge(partition_sketches[p]);
+        size_t const union_estimate = union_sketch.estimate();
+        size_t const current_partition_size = partition_sketches[p].estimate();
+
+        assert(union_estimate >= current_partition_size);
+        size_t const penalty_current_bin = union_estimate - current_partition_size;
+        size_t const penalty_current_ibf =
+            config.hibf_config.tmax
+            * ((union_estimate <= corrected_estimate_per_part) ? 0u : union_estimate - corrected_estimate_per_part);
+        size_t const change = penalty_current_bin + penalty_current_ibf + penalty_lower_level(cluster.size(), p);
+
+        // size_t const intersection = current_sketch.estimate() - change;
+        // double const subsume_ratio = static_cast<double>(intersection) / current_partition_size;
+        //std::cout << "p:" << p << " p-#UBs" << positions[p].size() << " penalty:" <<  penalty(cluster.size(), p) << " change:" << change << " union-current_p:" << (union_estimate - current_partition_size) << " union:" << union_estimate << " current_p:" << current_partition_size << " t:" << corrected_estimate_per_part << std::endl;
+        if (change == 0 || /* If there is no penalty at all, this is a best fit even if the partition is "full"*/
+            (smallest_change > change /*&& subsume_ratio > best_subsume_ratio &&*/
+             /*current_partition_size < corrected_estimate_per_part*/))
+        {
+            //std::cout << "smaller!" << std::endl;
+            // best_subsume_ratio = subsume_ratio;
+            smallest_change = change;
+            best_p = p;
+            best_p_found = true;
+        }
+    }
+
+    if (!best_p_found)
+        throw "currently there are no safety measures if a partition is not found because it is very unlikely";
+
+    //std::cout << "best_p:" << best_p << std::endl<< std::endl;
+
+    // now that we know which partition fits best (`best_p`), add those indices to it
+    for (size_t const user_bin_idx : cluster)
+    {
+        positions[best_p].push_back(user_bin_idx);
+        max_partition_cardinality[best_p] = std::max(max_partition_cardinality[best_p], cardinalities[user_bin_idx]);
+        min_partition_cardinality[best_p] = std::min(min_partition_cardinality[best_p], cardinalities[user_bin_idx]);
+    }
+    partition_sketches[best_p].merge(current_sketch);
+    corrected_estimate_per_part = std::max<size_t>(corrected_estimate_per_part, partition_sketches[best_p].estimate());
+
+    return true;
+}
+
+size_t split_bins(chopper::configuration const & config,
+                  std::vector<size_t> const & sorted_positions,
+                  std::vector<size_t> const & cardinalities,
+                  std::vector<std::vector<size_t>> & partitions,
+                  size_t const sum_of_cardinalities,
+                  size_t const end_idx)
+{
+    // assign split bins to the end. makes it easier to process merged bins afterwards
+    size_t pos{partitions.size() - 1};
+    for (size_t idx = 0; idx < end_idx; ++idx)
+    {
+        size_t const ub_idx{sorted_positions[idx]};
+        // Question: divide and ceil?
+        size_t const number_of_split_tbs =
+            std::max<size_t>(1u, config.hibf_config.tmax * cardinalities[ub_idx] / sum_of_cardinalities);
+        std::cout << "number_of_split_tbs: " << number_of_split_tbs
+                  << " cardinalities[ub_idx]:" << cardinalities[ub_idx]
+                  << " sum_of_cardinalities:" << sum_of_cardinalities << std::endl;
+        // fill partitions from behind to ensure an easier layouting
+        for (size_t i = 0; i < number_of_split_tbs; ++i)
+        {
+            assert(pos > 0);
+            assert(partitions[pos].empty());
+            partitions[pos].push_back(ub_idx);
+            --pos;
+        }
+    }
+    std::cout << "pos: " << pos << std::endl;
+    return pos + 1;
+}
+
+std::pair<size_t, size_t> find_bins_to_be_split(std::vector<size_t> const & sorted_positions,
+                                                std::vector<size_t> const & cardinalities,
+                                                size_t threshold,
+                                                size_t const max_bins)
+{
+    size_t idx{0};
+    size_t sum{0};
+    size_t number_of_split_bins = max_bins + 1; // initialise to something bigger to trigger while loop below
+
+    auto find_idx_and_sum = [&]()
+    {
+        while (idx < sorted_positions.size() && cardinalities[sorted_positions[idx]] > threshold)
+        {
+            sum += cardinalities[sorted_positions[idx]];
+            ++idx;
+        }
+    };
+
+    while (number_of_split_bins > max_bins)
+    {
+        idx = 0;
+        sum = 0;
+        find_idx_and_sum();
+        number_of_split_bins = sum / threshold;
+        // max(1.01, ...) because a tie would end in an endless loop.
+        threshold = static_cast<double>(threshold) * std::max(1.01, static_cast<double>(sum) / (threshold * max_bins));
+    }
+
+    // Maybe there are more user bins (position of idx) than available split bins.
+    // Thus, clamp the number of user bins to split to maximally the number_of_split_bins
+    idx = std::min(idx, number_of_split_bins);
+
+    return {idx, number_of_split_bins};
+}
+
+size_t lsh_sim_approach(chopper::configuration const & config,
+                        std::vector<size_t> const & sorted_positions2,
+                        std::vector<size_t> const & cardinalities,
+                        std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                        std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                        std::vector<std::vector<size_t>> & partitions,
+                        size_t const number_of_remaining_tbs,
+                        size_t const technical_bin_size_threshold,
+                        size_t const sum_of_cardinalities)
+{
+    uint8_t const sketch_bits{config.hibf_config.sketch_bits};
+    //std::cout << "LSH partitioning into " << config.hibf_config.tmax << std::endl;
+    std::vector<seqan::hibf::sketch::hyperloglog> partition_sketches(number_of_remaining_tbs,
+                                                                     seqan::hibf::sketch::hyperloglog(sketch_bits));
+
+    std::vector<size_t> max_partition_cardinality(number_of_remaining_tbs, 0u);
+    std::vector<size_t> min_partition_cardinality(number_of_remaining_tbs, std::numeric_limits<size_t>::max());
+
+    // initial partitioning using locality sensitive hashing (LSH)
+    config.lsh_algorithm_timer.start();
+    std::vector<Cluster> clusters = very_similar_LSH_partitioning(minHash_sketches,
+                                                                  sorted_positions2,
+                                                                  cardinalities,
+                                                                  sketches,
+                                                                  technical_bin_size_threshold,
+                                                                  config);
+    post_process_clusters(clusters, cardinalities, config);
+    config.lsh_algorithm_timer.stop();
+
+    std::ofstream ofs{"/tmp/final.clusters"};
+    for (size_t i = 0; i < clusters.size(); ++i)
+    {
+        seqan::hibf::sketch::hyperloglog sketch(config.hibf_config.sketch_bits);
+        for (size_t j = 0; j < clusters[i].size(); ++j)
+        {
+            sketch.merge(sketches[clusters[i].contained_user_bins()[j]]);
+        }
+        ofs << i << ":" << sketch.estimate() << ":" << clusters[i].size() << std::endl;
+    }
+
+    std::vector<std::vector<size_t>> remaining_clusters{};
+
+    // initialise partitions with the first p largest clusters (post_processing sorts by size)
+    size_t cidx{0}; // current cluster index
+    for (size_t p = 0; p < number_of_remaining_tbs; ++p)
+    {
+        assert(!clusters[cidx].empty());
+        auto const & cluster = clusters[cidx].contained_user_bins();
+        bool split_cluster = false;
+
+        if (cluster.size() > config.hibf_config.tmax)
+        {
+            size_t card{0};
+            for (size_t uidx = 0; uidx < cluster.size(); ++uidx)
+                card += cardinalities[cluster[uidx]];
+
+            if (card > 0.05 * sum_of_cardinalities / config.hibf_config.tmax)
+                split_cluster = true;
+        }
+
+        size_t end = (split_cluster) ? cluster.size() : std::min(cluster.size(), config.hibf_config.tmax);
+        for (size_t uidx = 0; uidx < end; ++uidx)
+        {
+            size_t const user_bin_idx = cluster[uidx];
+            // if a single cluster already exceeds the cardinality_per_part,
+            // then the remaining user bins of the cluster must spill over into the next partition
+            if ((uidx != 0 && (uidx % config.hibf_config.tmax == 0))
+                || partition_sketches[p].estimate() > technical_bin_size_threshold)
+            {
+                ++p;
+
+                if (p >= number_of_remaining_tbs)
+                {
+                    split_cluster = true;
+                    end = uidx;
+                    break;
+                }
+            }
+
+            partition_sketches[p].merge(sketches[user_bin_idx]);
+            partitions[p].push_back(user_bin_idx);
+            max_partition_cardinality[p] = std::max(max_partition_cardinality[p], cardinalities[user_bin_idx]);
+            min_partition_cardinality[p] = std::min(min_partition_cardinality[p], cardinalities[user_bin_idx]);
+        }
+
+        if (split_cluster)
+        {
+            std::vector<size_t> remainder(cluster.begin() + end, cluster.end());
+            remaining_clusters.insert(remaining_clusters.end(), remainder);
+        }
+
+        ++cidx;
+    }
+
+    for (size_t i = cidx; i < clusters.size(); ++i)
+    {
+        if (clusters[i].empty())
+            break;
+
+        remaining_clusters.insert(remaining_clusters.end(), clusters[i].contained_user_bins());
+    }
+
+    // assign the rest by similarity
+    size_t merged_threshold{technical_bin_size_threshold};
+    for (size_t ridx = 0; ridx < remaining_clusters.size(); ++ridx)
+    {
+        auto const & cluster = remaining_clusters[ridx];
+
+        config.search_partition_algorithm_timer.start();
+        find_best_partition(config,
+                            number_of_remaining_tbs,
+                            merged_threshold,
+                            cluster,
+                            cardinalities,
+                            sketches,
+                            partitions,
+                            partition_sketches,
+                            max_partition_cardinality,
+                            min_partition_cardinality);
+        config.search_partition_algorithm_timer.start();
+    }
+
+    // compute actual max size
+    size_t max_size{0};
+    for (auto const & sketch : partition_sketches)
+        max_size = std::max(max_size, (size_t)sketch.estimate());
+
+    return max_size;
+}
+
+void partition_user_bins(chopper::configuration const & config,
+                         std::vector<size_t> const & positions,
+                         std::vector<size_t> const & cardinalities,
+                         std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                         std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                         std::vector<std::vector<size_t>> & partitions)
+{
+    // all approaches need sorted positions
+    std::vector<size_t> const sorted_positions = [&positions, &cardinalities]()
+    {
+        std::vector<size_t> ps(positions.begin(), positions.end());
+        seqan::hibf::sketch::toolbox::sort_by_cardinalities(cardinalities, ps);
+        return ps;
+    }();
+
+    auto const [sum_of_cardinalities, max_cardinality, joint_estimate] = [&]()
+    {
+        size_t sum{0};
+        size_t max{0};
+        seqan::hibf::sketch::hyperloglog sketch{config.hibf_config.sketch_bits};
+
+        for (size_t const pos : positions)
+        {
+            sum += cardinalities[pos];
+            max = std::max(max, cardinalities[pos]);
+            sketch.merge(sketches[pos]);
+        }
+
+        return std::tuple<size_t, size_t, size_t>{sum, max, sketch.estimate()};
+    }();
+
+    // If the effective text size is very low, it can happen that the joint_estimate divided by the number of partitions
+    // is lower than the largest single user bin. But of course, we can never reach a smaller max technical bin size
+    // then that of the largest user user bin. Thus we can correct the estimate_per_part beforehand.
+    // This way we make sure there is at least 1 LSH clustering step.
+    [[maybe_unused]] size_t const estimate_per_part =
+        std::max(seqan::hibf::divide_and_ceil(joint_estimate, config.hibf_config.tmax), max_cardinality + 1);
+
+    double const relaxed_fpr_correction = seqan::hibf::layout::compute_relaxed_fpr_correction(
+        {.fpr = config.hibf_config.maximum_fpr, //
+         .relaxed_fpr = config.hibf_config.relaxed_fpr,
+         .hash_count = config.hibf_config.number_of_hash_functions});
+    //std::cout << "sum_of_cardinalities:" << sum_of_cardinalities << " joint_estimate:" << joint_estimate << std::endl;
+
+    size_t split_threshold{};
+    size_t idx{0}; // start in sorted positions
+    size_t number_of_split_tbs{0};
+    size_t number_of_merged_tbs{config.hibf_config.tmax};
+    size_t max_split_size{0};
+    size_t max_merged_size{0};
+
+    // split_threshold = seqan::hibf::divide_and_ceil(sum_of_cardinalities, config.hibf_config.tmax);
+    split_threshold = seqan::hibf::divide_and_ceil(joint_estimate, config.hibf_config.tmax);
+
+    auto parition_split_bins = [&]()
+    {
+        size_t number_of_potential_split_bins{0};
+        std::tie(idx, number_of_potential_split_bins) =
+            find_bins_to_be_split(sorted_positions, cardinalities, split_threshold, config.hibf_config.tmax - 1);
+        std::tie(number_of_split_tbs, max_split_size) =
+            chopper::layout::determine_split_bins(config,
+                                                  sorted_positions,
+                                                  cardinalities,
+                                                  number_of_potential_split_bins,
+                                                  idx,
+                                                  partitions);
+        number_of_merged_tbs = config.hibf_config.tmax - number_of_split_tbs;
+    };
+
+    auto partitions_merged_bins = [&]()
+    {
+        // determine number of split bins
+        std::vector<size_t> const sorted_positions2(sorted_positions.begin() + idx, sorted_positions.end());
+
+        // distribute the rest to merged bins
+        size_t const corrected_max_split_size = max_split_size / relaxed_fpr_correction;
+        size_t const merged_threshold = std::max(corrected_max_split_size, split_threshold);
+
+        max_merged_size = lsh_sim_approach(config,
+                                           sorted_positions2,
+                                           cardinalities,
+                                           sketches,
+                                           minHash_sketches,
+                                           partitions,
+                                           number_of_merged_tbs,
+                                           merged_threshold,
+                                           sum_of_cardinalities);
+    };
+
+    parition_split_bins();
+    partitions_merged_bins();
+
+    int64_t const difference =
+        static_cast<int64_t>(max_merged_size * relaxed_fpr_correction) - static_cast<int64_t>(max_split_size);
+
+    std::cout << "number_of_split_tbs:" << number_of_split_tbs << " difference:" << difference << std::endl;
+    std::cout << "Reconfiguring threshold.  from:" << split_threshold;
+
+    if (number_of_split_tbs == 0)
+        split_threshold = (split_threshold + max_merged_size) / 2; // increase threshold
+    else if (difference > 0)                                       // need more merged bins -> increase threshold
+        split_threshold =
+            static_cast<double>(split_threshold)
+            * ((static_cast<double>(max_merged_size) * relaxed_fpr_correction) / static_cast<double>(max_split_size));
+    else // need more split bins -> decrease threshold
+        split_threshold =
+            static_cast<double>(split_threshold)
+            * ((static_cast<double>(max_merged_size) * relaxed_fpr_correction) / static_cast<double>(max_split_size));
+
+    std::cout << " to:" << split_threshold << std::endl;
+
+    // reset result
+    partitions.clear();
+    partitions.resize(config.hibf_config.tmax);
+    idx = 0;
+    number_of_split_tbs = 0;
+    number_of_merged_tbs = config.hibf_config.tmax;
+    max_split_size = 0;
+    max_merged_size = 0;
+
+    parition_split_bins();
+    partitions_merged_bins();
+
+    // sanity check:
+    size_t sum{0};
+    size_t last_index{positions.size()}; // non existing user bin idx
+    for (auto const & p : partitions)
+    {
+        if (p.size() == 1)
+        {
+            // for split bins, avoid additional counts
+            if (p[0] != last_index)
+                ++sum;
+
+            last_index = p[0];
+        }
+        else
+        {
+            sum += p.size();
+        }
+    }
+
+    if (sum != positions.size())
+    {
+        std::string str{"Not all user bins have been assigned to the "};
+        str += std::to_string(partitions.size());
+        str += " partitions! (";
+        str += std::to_string(sum);
+        str += "/";
+        str += std::to_string(positions.size());
+        str += ")\n";
+        for (auto const & p : partitions)
+        {
+            str += "[";
+            for (auto const h : p)
+            {
+                str += std::to_string(h);
+                str += ",";
+            }
+            str.back() = ']';
+            str += '\n';
+        }
+
+        throw std::logic_error{str};
+    }
+
+    // assert([&](){ bool x{false}; for (auto const & p : partitions) { x &= !p.empty(); }; return x; });
+}
+
+seqan::hibf::layout::layout general_layout(chopper::configuration const & config,
+                                           std::vector<size_t> positions,
+                                           std::vector<size_t> const & cardinalities,
+                                           std::vector<seqan::hibf::sketch::hyperloglog> const & sketches)
+{
+    seqan::hibf::layout::layout hibf_layout;
+
+    seqan::hibf::concurrent_timer union_estimation_timer{};
+    seqan::hibf::concurrent_timer rearrangement_timer{};
+    seqan::hibf::concurrent_timer dp_algorithm_timer{};
+
+    dp_algorithm_timer.start();
+    hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config,
+                                                      cardinalities,
+                                                      sketches,
+                                                      std::move(positions),
+                                                      union_estimation_timer,
+                                                      rearrangement_timer);
+    dp_algorithm_timer.stop();
+
+    return hibf_layout;
+}
+
+bool do_I_need_a_fast_layout(chopper::configuration const & config,
+                             std::vector<size_t> const & positions,
+                             std::vector<size_t> const & cardinalities)
+{
+    // the fast layout heuristic would greedily merge even if merging only 2 bins at a time
+    // merging only little number of bins is highly disadvantegous for lower levels because few bins
+    // will be heavily split and this will raise the fpr correction for split bins
+    // Thus, if the average number of user bins per technical bin is less then 64, we should not fast layout
+    if (positions.size() < (64 * config.hibf_config.tmax))
+        return false;
+
+    if (positions.size() > 500'000) // layout takes more than half a day (should this be a user option?)
+        return true;
+
+    size_t largest_size{0};
+    size_t sum_of_cardinalities{0};
+
+    for (size_t const i : positions)
+    {
+        sum_of_cardinalities += cardinalities[i];
+        largest_size = std::max(largest_size, cardinalities[i]);
+    }
+
+    size_t const cardinality_per_tb = sum_of_cardinalities / config.hibf_config.tmax;
+
+    bool const largest_user_bin_might_be_split = largest_size > cardinality_per_tb;
+
+    // if no splitting is needed, its worth it to use a fast-merge-only algorithm
+    if (!largest_user_bin_might_be_split)
+        return true;
+
+    return false;
+}
+
+void add_level_to_layout(chopper::configuration const & config,
+                         seqan::hibf::layout::layout & hibf_layout,
+                         std::vector<std::vector<size_t>> const & partitions,
+                         std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                         std::vector<size_t> const & previous)
+{
+    size_t max_bin_id{0};
+    size_t max_size{0};
+
+    auto const split_fpr_correction =
+        seqan::hibf::layout::compute_fpr_correction({.fpr = config.hibf_config.maximum_fpr, //
+                                                     .hash_count = config.hibf_config.number_of_hash_functions,
+                                                     .t_max = partitions.size()});
+
+    double const relaxed_fpr_correction = seqan::hibf::layout::compute_relaxed_fpr_correction(
+        {.fpr = config.hibf_config.maximum_fpr, //
+         .relaxed_fpr = config.hibf_config.relaxed_fpr,
+         .hash_count = config.hibf_config.number_of_hash_functions});
+
+    // we assume here that the user bins have been sorted by user bin id such that pos = idx
+    for (size_t partition_idx{0}; partition_idx < partitions.size(); ++partition_idx)
+    {
+        auto const & partition = partitions[partition_idx];
+
+        if (partition.size() > 1) // merged bin
+        {
+            seqan::hibf::sketch::hyperloglog current_sketch{sketches[0]}; // ensure same bit size
+            current_sketch.reset();
+
+            for (size_t const user_bin_id : partition)
+            {
+                assert(hibf_layout.user_bins[user_bin_id].idx == user_bin_id);
+                auto & current_user_bin = hibf_layout.user_bins[user_bin_id];
+
+                // update
+                assert(previous == current_user_bin.previous_TB_indices);
+                current_user_bin.previous_TB_indices.push_back(partition_idx);
+                current_sketch.merge(sketches[user_bin_id]);
+            }
+
+            // update max_bin_id, max_size
+            size_t const current_size = current_sketch.estimate() * relaxed_fpr_correction;
+            if (current_size > max_size)
+            {
+                max_bin_id = partition_idx;
+                max_size = current_size;
+            }
+        }
+        else if (partition.size() == 0) // should not happen.. dge case?
+        {
+            continue;
+        }
+        else // single or split bin (partition.size() == 1)
+        {
+            auto & current_user_bin = hibf_layout.user_bins[partitions[partition_idx][0]];
+            assert(current_user_bin.idx == partitions[partition_idx][0]);
+            current_user_bin.storage_TB_id = partition_idx;
+            current_user_bin.number_of_technical_bins = 1; // initialise to 1
+
+            while (partition_idx + 1 < partitions.size() && partitions[partition_idx].size() == 1
+                   && partitions[partition_idx + 1].size() == 1
+                   && partitions[partition_idx][0] == partitions[partition_idx + 1][0])
+            {
+                ++current_user_bin.number_of_technical_bins;
+                ++partition_idx;
+            }
+
+            // update max_bin_id, max_size
+            size_t const current_size = sketches[current_user_bin.idx].estimate()
+                                      * split_fpr_correction[current_user_bin.number_of_technical_bins];
+            if (current_size > max_size)
+            {
+                max_bin_id = current_user_bin.storage_TB_id;
+                max_size = current_size;
+            }
+        }
+    }
+
+    hibf_layout.max_bins.emplace_back(previous, max_bin_id); // add lower level meta information
+}
+
+void update_layout_from_child_layout(seqan::hibf::layout::layout & child_layout,
+                                     seqan::hibf::layout::layout & hibf_layout,
+                                     std::vector<size_t> const & new_previous)
+{
+    hibf_layout.max_bins.emplace_back(new_previous, child_layout.top_level_max_bin_id);
+
+    for (auto & max_bin : child_layout.max_bins)
+    {
+        max_bin.previous_TB_indices.insert(max_bin.previous_TB_indices.begin(),
+                                           new_previous.begin(),
+                                           new_previous.end());
+        hibf_layout.max_bins.push_back(max_bin);
+    }
+
+    for (auto const & user_bin : child_layout.user_bins)
+    {
+        auto & actual_user_bin = hibf_layout.user_bins[user_bin.idx];
+
+        actual_user_bin.previous_TB_indices.insert(actual_user_bin.previous_TB_indices.end(),
+                                                   user_bin.previous_TB_indices.begin(),
+                                                   user_bin.previous_TB_indices.end());
+        actual_user_bin.number_of_technical_bins = user_bin.number_of_technical_bins;
+        actual_user_bin.storage_TB_id = user_bin.storage_TB_id;
+    }
+}
+
+void fast_layout_recursion(chopper::configuration const & config,
+                           std::vector<size_t> const & positions,
+                           std::vector<size_t> const & cardinalities,
+                           std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                           std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                           seqan::hibf::layout::layout & hibf_layout,
+                           std::vector<size_t> const & previous)
+{
+    std::vector<std::vector<size_t>> tmax_partitions(config.hibf_config.tmax);
+
+    // here we assume that we want to start with a fast layout
+    partition_user_bins(config, positions, cardinalities, sketches, minHash_sketches, tmax_partitions);
+
+#pragma omp critical
+    {
+        add_level_to_layout(config, hibf_layout, tmax_partitions, sketches, previous);
+    }
+
+    for (size_t partition_idx = 0; partition_idx < tmax_partitions.size(); ++partition_idx)
+    {
+        auto const & partition = tmax_partitions[partition_idx];
+        auto const new_previous = [&]()
+        {
+            auto cpy{previous};
+            cpy.push_back(partition_idx);
+            return cpy;
+        }();
+
+        if (partition.empty() || partition.size() == 1) // nothing to merge
+            continue;
+
+        if (do_I_need_a_fast_layout(config, partition, cardinalities))
+        {
+            fast_layout_recursion(config,
+                                  partition,
+                                  cardinalities,
+                                  sketches,
+                                  minHash_sketches,
+                                  hibf_layout,
+                                  new_previous); // recurse fast_layout
+        }
+        else
+        {
+            auto child_layout = general_layout(config, partition, cardinalities, sketches);
+
+#pragma omp critical
+            {
+                update_layout_from_child_layout(child_layout, hibf_layout, new_previous);
+            }
+        }
+    }
+}
+
+void fast_layout(chopper::configuration const & config,
+                 std::vector<size_t> const & positions,
+                 std::vector<size_t> const & cardinalities,
+                 std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+                 std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches,
+                 seqan::hibf::layout::layout & hibf_layout)
+{
+    auto config_copy = config;
+
+    std::vector<std::vector<size_t>> tmax_partitions(config.hibf_config.tmax);
+
+    // here we assume that we want to start with a fast layout
+    config.intital_partition_timer.start();
+    partition_user_bins(config_copy, positions, cardinalities, sketches, minHash_sketches, tmax_partitions);
+    config.intital_partition_timer.stop();
+
+    auto const split_fpr_correction =
+        seqan::hibf::layout::compute_fpr_correction({.fpr = config.hibf_config.maximum_fpr, //
+                                                     .hash_count = config.hibf_config.number_of_hash_functions,
+                                                     .t_max = config.hibf_config.tmax});
+
+    double const relaxed_fpr_correction = seqan::hibf::layout::compute_relaxed_fpr_correction(
+        {.fpr = config.hibf_config.maximum_fpr, //
+         .relaxed_fpr = config.hibf_config.relaxed_fpr,
+         .hash_count = config.hibf_config.number_of_hash_functions});
+
+    size_t max_bin_id{0};
+    size_t max_size{0};
+    hibf_layout.user_bins.resize(config_copy.hibf_config.number_of_user_bins);
+
+    // initialise user bins in layout
+    for (size_t partition_idx = 0; partition_idx < tmax_partitions.size(); ++partition_idx)
+    {
+        if (tmax_partitions[partition_idx].size() > 1) // merged bin
+        {
+            seqan::hibf::sketch::hyperloglog current_sketch{sketches[0]}; // ensure same bit size
+            current_sketch.reset();
+
+            for (size_t const user_bin_id : tmax_partitions[partition_idx])
+            {
+                hibf_layout.user_bins[user_bin_id] = {.previous_TB_indices = {partition_idx},
+                                                      .storage_TB_id = 0 /*not determiend yet*/,
+                                                      .number_of_technical_bins = 1 /*not determiend yet*/,
+                                                      .idx = user_bin_id};
+                current_sketch.merge(sketches[user_bin_id]);
+            }
+
+            // update max_bin_id, max_size
+            size_t const current_size = current_sketch.estimate() * relaxed_fpr_correction;
+            if (current_size > max_size)
+            {
+                max_bin_id = partition_idx;
+                max_size = current_size;
+            }
+        }
+        else if (tmax_partitions[partition_idx].size() == 0) // should not happen. Edge case?
+        {
+            continue;
+        }
+        else // single or split bin (tmax_partitions[partition_idx].size() == 1)
+        {
+            assert(tmax_partitions[partition_idx].size() == 1);
+            size_t const user_bin_id = tmax_partitions[partition_idx][0];
+            hibf_layout.user_bins[user_bin_id] = {.previous_TB_indices = {},
+                                                  .storage_TB_id = partition_idx,
+                                                  .number_of_technical_bins = 1 /*determiend below*/,
+                                                  .idx = user_bin_id};
+
+            while (partition_idx + 1 < tmax_partitions.size() && tmax_partitions[partition_idx].size() == 1
+                   && tmax_partitions[partition_idx + 1].size() == 1
+                   && tmax_partitions[partition_idx][0] == tmax_partitions[partition_idx + 1][0])
+            {
+                ++hibf_layout.user_bins[user_bin_id].number_of_technical_bins;
+                ++partition_idx;
+            }
+
+            // update max_bin_id, max_size
+            size_t const current_size =
+                sketches[user_bin_id].estimate()
+                * split_fpr_correction[hibf_layout.user_bins[user_bin_id].number_of_technical_bins];
+            if (current_size > max_size)
+            {
+                max_bin_id = hibf_layout.user_bins[user_bin_id].storage_TB_id;
+                max_size = current_size;
+            }
+        }
+    }
+
+    hibf_layout.top_level_max_bin_id = max_bin_id;
+
+    config.small_layouts_timer.start();
+#pragma omp parallel
+#pragma omp single
+    {
+#pragma omp taskloop
+        for (size_t partition_idx = 0; partition_idx < tmax_partitions.size(); ++partition_idx)
+        {
+            auto const & partition = tmax_partitions[partition_idx];
+
+            if (partition.empty() || partition.size() == 1) // nothing to merge
+                continue;
+
+            if (do_I_need_a_fast_layout(config_copy, partition, cardinalities))
+            {
+                fast_layout_recursion(config_copy,
+                                      partition,
+                                      cardinalities,
+                                      sketches,
+                                      minHash_sketches,
+                                      hibf_layout,
+                                      {partition_idx}); // recurse fast_layout
+            }
+            else
+            {
+                auto small_layout = general_layout(config, partition, cardinalities, sketches);
+
+#pragma omp critical
+                {
+                    update_layout_from_child_layout(small_layout, hibf_layout, std::vector<size_t>{partition_idx});
+                }
+            }
+        }
+    }
+    config.small_layouts_timer.stop();
+}
+
 int execute(chopper::configuration & config,
             std::vector<std::vector<std::string>> const & filenames,
-            std::vector<seqan::hibf::sketch::hyperloglog> const & sketches)
+            std::vector<seqan::hibf::sketch::hyperloglog> const & sketches,
+            std::vector<seqan::hibf::sketch::minhashes> const & minHash_sketches)
 {
     config.hibf_config.validate_and_set_defaults();
 
-    seqan::hibf::layout::layout hibf_layout;
-    std::vector<size_t> kmer_counts;
-    seqan::hibf::sketch::estimate_kmer_counts(sketches, kmer_counts);
+    std::vector<size_t> cardinalities;
+    seqan::hibf::sketch::estimate_kmer_counts(sketches, cardinalities);
 
-    if (config.determine_best_tmax)
+    if (true) // 0 == unset == single HIBF, 1 == single HIBF
     {
-        hibf_layout = determine_best_number_of_technical_bins(config, kmer_counts, sketches);
+        seqan::hibf::layout::layout hibf_layout;
+
+        if (config.determine_best_tmax)
+        {
+            hibf_layout = determine_best_number_of_technical_bins(config, cardinalities, sketches);
+        }
+        else
+        {
+            config.dp_algorithm_timer.start();
+            fast_layout(config,
+                        seqan::hibf::iota_vector(sketches.size()),
+                        cardinalities,
+                        sketches,
+                        minHash_sketches,
+                        hibf_layout);
+            config.dp_algorithm_timer.stop();
+
+            // hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config,
+            //                                                   cardinalities,
+            //                                                   sketches,
+            //                                                   seqan::hibf::iota_vector(sketches.size()),
+            //                                                   config.union_estimation_timer,
+            //                                                   config.rearrangement_timer);
+
+            // sort records ascending by the number of bin indices (corresponds to the IBF levels)
+            // GCOVR_EXCL_START
+            std::ranges::sort(hibf_layout.max_bins,
+                              [](auto const & r, auto const & l)
+                              {
+                                  if (r.previous_TB_indices.size() == l.previous_TB_indices.size())
+                                      return std::ranges::lexicographical_compare(r.previous_TB_indices,
+                                                                                  l.previous_TB_indices);
+                                  else
+                                      return r.previous_TB_indices.size() < l.previous_TB_indices.size();
+                              });
+            // GCOVR_EXCL_STOP
+
+            if (config.output_verbose_statistics)
+            {
+                size_t dummy{};
+                chopper::layout::hibf_statistics global_stats{config, sketches, cardinalities};
+                global_stats.hibf_layout = hibf_layout;
+                global_stats.print_header_to(std::cout);
+                global_stats.print_summary_to(dummy, std::cout);
+            }
+        }
+
+        // brief Write the output to the layout file.
+        std::ofstream fout{config.output_filename};
+        chopper::layout::write_user_bins_to(filenames, fout);
+        config.write_to(fout);
+        hibf_layout.write_to(fout);
     }
     else
     {
-        config.dp_algorithm_timer.start();
-        hibf_layout = seqan::hibf::layout::compute_layout(config.hibf_config,
-                                                          kmer_counts,
-                                                          sketches,
-                                                          seqan::hibf::iota_vector(sketches.size()),
-                                                          config.union_estimation_timer,
-                                                          config.rearrangement_timer);
-        config.dp_algorithm_timer.stop();
+        std::vector<std::vector<size_t>> positions(config.hibf_config.tmax); // asign positions for each partition
+
+        std::vector<size_t> ps;
+        ps.resize(cardinalities.size());
+        std::iota(ps.begin(), ps.end(), 0);
+        partition_user_bins(config, ps, cardinalities, sketches, minHash_sketches, positions);
+
+        std::vector<seqan::hibf::layout::layout> hibf_layouts(config.hibf_config.tmax); // multiple layouts
 
-        if (config.output_verbose_statistics)
+#pragma omp parallel for schedule(dynamic) num_threads(config.hibf_config.threads)
+        for (size_t i = 0; i < config.hibf_config.tmax; ++i)
         {
-            size_t dummy{};
-            chopper::layout::hibf_statistics global_stats{config, sketches, kmer_counts};
-            global_stats.hibf_layout = hibf_layout;
-            global_stats.print_header_to(std::cout);
-            global_stats.print_summary_to(dummy, std::cout);
+            // reset tmax to fit number of user bins in layout
+            auto local_hibf_config = config.hibf_config; // every thread needs to set individual tmax
+            local_hibf_config.tmax =
+                chopper::next_multiple_of_64(static_cast<uint16_t>(std::ceil(std::sqrt(positions[i].size()))));
+
+            config.dp_algorithm_timer.start();
+            hibf_layouts[i] = seqan::hibf::layout::compute_layout(local_hibf_config,
+                                                                  cardinalities,
+                                                                  sketches,
+                                                                  std::move(positions[i]),
+                                                                  config.union_estimation_timer,
+                                                                  config.rearrangement_timer);
+            config.dp_algorithm_timer.stop();
         }
-    }
 
-    // brief Write the output to the layout file.
-    std::ofstream fout{config.output_filename};
-    chopper::layout::write_user_bins_to(filenames, fout);
-    config.write_to(fout);
-    hibf_layout.write_to(fout);
+        // brief Write the output to the layout file.
+        std::ofstream fout{config.output_filename};
+        chopper::layout::write_user_bins_to(filenames, fout);
+        config.write_to(fout);
+
+        for (size_t i = 0; i < config.hibf_config.tmax; ++i)
+            hibf_layouts[i].write_to(fout);
+    }
 
     return 0;
 }
diff --git a/src/layout/hibf_statistics.cpp b/src/layout/hibf_statistics.cpp
index df86775c..d764b394 100644
--- a/src/layout/hibf_statistics.cpp
+++ b/src/layout/hibf_statistics.cpp
@@ -398,7 +398,9 @@ void hibf_statistics::collect_bins()
 #    pragma GCC diagnostic ignored "-Warray-bounds="
 #    pragma GCC diagnostic ignored "-Wstringop-overflow="
 #endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMMOV
-                ibf.bins.emplace_back(hibf_statistics::bin_kind::merged, 1, std::vector<size_t>{user_bin_info.idx});
+
+                // ibf.bins.emplace_back(hibf_statistics::bin_kind::merged, 1, std::vector<size_t>{user_bin_info.idx});
+
 #if CHOPPER_WORKAROUND_GCC_BOGUS_MEMMOV
 #    pragma GCC diagnostic pop
 #endif // CHOPPER_WORKAROUND_GCC_BOGUS_MEMMOV
diff --git a/test/api/CMakeLists.txt b/test/api/CMakeLists.txt
index 050affcf..3c46f8a4 100644
--- a/test/api/CMakeLists.txt
+++ b/test/api/CMakeLists.txt
@@ -9,5 +9,6 @@ include (add_subdirectories)
 
 add_api_test (config_test.cpp)
 add_api_test (input_functor_test.cpp)
+add_api_test (lsh_test.cpp)
 
 add_subdirectories ()
diff --git a/test/api/layout/CMakeLists.txt b/test/api/layout/CMakeLists.txt
index 9cdb4076..45f5b1ce 100644
--- a/test/api/layout/CMakeLists.txt
+++ b/test/api/layout/CMakeLists.txt
@@ -9,6 +9,7 @@ cmake_minimum_required (VERSION 3.18)
 
 add_api_test (ibf_query_cost_test.cpp)
 add_api_test (execute_layout_test.cpp)
+add_api_test (fast_layout_test.cpp)
 
 add_api_test (execute_with_estimation_test.cpp)
 target_use_datasources (execute_with_estimation_test FILES seq1.fa)
@@ -26,3 +27,4 @@ if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU"
 endif ()
 add_api_test (user_bin_io_test.cpp)
 add_api_test (input_test.cpp)
+add_api_test (determine_split_bins_test.cpp)
diff --git a/test/api/layout/determine_split_bins_test.cpp b/test/api/layout/determine_split_bins_test.cpp
new file mode 100644
index 00000000..4b0c49d2
--- /dev/null
+++ b/test/api/layout/determine_split_bins_test.cpp
@@ -0,0 +1,193 @@
+#include <gtest/gtest.h> // for Test, TestInfo, EXPECT_EQ, Message, TEST, TestPartResult
+
+#include <iostream> // for operator<<, char_traits, basic_ostream, basic_stringstream, strings...
+#include <numeric>  // for allocator, string
+#include <vector>   // for vector
+
+#include <chopper/configuration.hpp>
+
+#include <hibf/layout/compute_fpr_correction.hpp>
+#include <hibf/layout/print_matrix.hpp> // for data_store
+#include <hibf/misc/divide_and_ceil.hpp>
+
+std::pair<size_t, size_t> determine_split_bins(chopper::configuration const & config,
+                                               std::vector<size_t> const & positions,
+                                               std::vector<size_t> const & cardinalities,
+                                               size_t const num_technical_bins,
+                                               size_t const num_user_bins,
+                                               std::vector<std::vector<size_t>> & partitions)
+{
+    if (num_user_bins == 0)
+        return {0, 0};
+
+    assert(num_technical_bins > 0u);
+    assert(num_user_bins > 0u);
+    assert(num_user_bins <= num_technical_bins);
+
+    auto const fpr_correction =
+        seqan::hibf::layout::compute_fpr_correction({.fpr = config.hibf_config.maximum_fpr, //
+                                                     .hash_count = config.hibf_config.number_of_hash_functions,
+                                                     .t_max = num_technical_bins});
+
+    std::vector<std::vector<size_t>> matrix(num_technical_bins); // rows
+    for (auto & v : matrix)
+        v.resize(num_user_bins, std::numeric_limits<size_t>::max()); // columns
+
+    std::vector<std::vector<size_t>> trace(num_technical_bins); // rows
+    for (auto & v : trace)
+        v.resize(num_user_bins, std::numeric_limits<size_t>::max()); // columns
+
+    size_t const extra_bins = num_technical_bins - num_user_bins + 1;
+
+    // initialize first column (first row is initialized with inf)
+    double const ub_cardinality = static_cast<double>(cardinalities[positions[0]]);
+    for (size_t i = 0; i < extra_bins; ++i)
+    {
+        size_t const corrected_ub_cardinality = static_cast<size_t>(ub_cardinality * fpr_correction[i + 1]);
+        matrix[i][0] = seqan::hibf::divide_and_ceil(corrected_ub_cardinality, i + 1u);
+    }
+
+    // we must iterate column wise
+    for (size_t j = 1; j < num_user_bins; ++j)
+    {
+        double const ub_cardinality = static_cast<double>(cardinalities[positions[j]]);
+
+        for (size_t i = j; i < j + extra_bins; ++i)
+        {
+            size_t minimum{std::numeric_limits<size_t>::max()};
+
+            for (size_t i_prime = j - 1; i_prime < i; ++i_prime)
+            {
+                size_t const corrected_ub_cardinality =
+                    static_cast<size_t>(ub_cardinality * fpr_correction[(i - i_prime)]);
+                size_t score = std::max<size_t>(seqan::hibf::divide_and_ceil(corrected_ub_cardinality, i - i_prime),
+                                                matrix[i_prime][j - 1]);
+
+                // std::cout << "j:" << j << " i:" << i << " i':" << i_prime << " score:" << score << std::endl;
+
+                minimum = (score < minimum) ? (trace[i][j] = i_prime, score) : minimum;
+            }
+
+            matrix[i][j] = minimum;
+        }
+    }
+
+    // seqan::hibf::layout::print_matrix(matrix, num_technical_bins, num_user_bins, std::numeric_limits<size_t>::max());
+    //seqan::hibf::layout::print_matrix(trace, num_technical_bins, num_user_bins, std::numeric_limits<size_t>::max());
+
+    // backtracking
+    // first, in the last column, find the row with minimum score (it can happen that the last rows are equally good)
+    // the less rows, the better
+    size_t trace_i{num_technical_bins - 1};
+    size_t best_score{std::numeric_limits<size_t>::max()};
+    for (size_t best_i{0}; best_i < num_technical_bins; ++best_i)
+    {
+        if (matrix[best_i][num_user_bins - 1] < best_score)
+        {
+            best_score = matrix[best_i][num_user_bins - 1];
+            trace_i = best_i;
+        }
+    }
+    size_t const number_of_split_tbs{trace_i + 1}; // trace_i is the position. So +1 for the number
+
+    // now that we found the best trace_i start usual backtracking
+    size_t trace_j = num_user_bins - 1;
+
+    // size_t max_id{};
+    size_t max_size{};
+
+    size_t bin_id{};
+
+    while (trace_j > 0)
+    {
+        size_t next_i = trace[trace_i][trace_j];
+        size_t const number_of_bins = (trace_i - next_i);
+        size_t const cardinality = cardinalities[positions[trace_j]];
+        size_t const corrected_cardinality = static_cast<size_t>(cardinality * fpr_correction[number_of_bins]);
+        size_t const cardinality_per_bin = seqan::hibf::divide_and_ceil(corrected_cardinality, number_of_bins);
+
+        if (cardinality_per_bin > max_size)
+        {
+            // max_id = bin_id;
+            max_size = cardinality_per_bin;
+        }
+
+        for (size_t splits{0}; splits < number_of_bins; ++splits)
+        {
+            partitions[partitions.size() - 1 - bin_id].push_back(positions[trace_j]);
+            ++bin_id;
+        }
+
+        trace_i = trace[trace_i][trace_j];
+        --trace_j;
+    }
+    ++trace_i; // because we want the length not the index. Now trace_i == number_of_bins
+    size_t const cardinality = cardinalities[positions[0]];
+    size_t const corrected_cardinality = static_cast<size_t>(cardinality * fpr_correction[trace_i]);
+    // NOLINTNEXTLINE(clang-analyzer-core.DivideZero)
+    size_t const cardinality_per_bin = seqan::hibf::divide_and_ceil(corrected_cardinality, trace_i);
+
+    if (cardinality_per_bin > max_size)
+    {
+        // max_id = bin_id;
+        max_size = cardinality_per_bin;
+    }
+
+    for (size_t splits{0}; splits < trace_i; ++splits)
+    {
+        partitions[partitions.size() - 1 - bin_id].push_back(positions[0]);
+        ++bin_id;
+    }
+
+    return {number_of_split_tbs, max_size};
+}
+
+TEST(simple_split, first)
+{
+    chopper::configuration config{};
+
+    std::vector<size_t> cardinalities{};
+    for (size_t i{0}; i < 20; ++i)
+        cardinalities.push_back(2400);
+    cardinalities.push_back(200);
+    for (size_t i{0}; i < 84; ++i)
+        cardinalities.push_back(50);
+
+    std::vector<size_t> positions(cardinalities.size());
+    std::iota(positions.begin(), positions.end(), 0u);
+
+    size_t num_technical_bins{63};
+    size_t num_user_bins{21};
+
+    std::vector<std::vector<size_t>> partitions(64);
+
+    auto const [num_splits, max_size] =
+        determine_split_bins(config, positions, cardinalities, num_technical_bins, num_user_bins, partitions);
+
+    EXPECT_EQ(num_splits, 61);
+    EXPECT_EQ(max_size, 1452);
+
+    size_t ub_idx{20};
+    size_t tb_idx{partitions.size() - 1};
+
+    // TBs:  0, 1, 2, 3, ...
+    // UBs: 20,19,19,19,18,18,18,17,17,17,16,16,16,15,15,15,14,14,14,13,13,13,12,12,12,11,11,11,10,10,10,9,9,9,8,8,8,7,7,7,6,6,6,5,5,5,4,4,4,3,3,3,2,2,2,1,1,1,0,0,0,
+
+    EXPECT_EQ(partitions[tb_idx][0], ub_idx); // single bin with card = 200
+    --tb_idx;
+    --ub_idx;
+
+    for (size_t i{partitions.size() - 1}; i > partitions.size() - num_splits - 1; --i)
+        std::cerr << partitions[i][0] << ",";
+
+    for (; tb_idx > partitions.size() - num_splits - 1; --tb_idx)
+    {
+        for (size_t three = 0; three < 3; ++three)
+        {
+            EXPECT_EQ(partitions[tb_idx][0], ub_idx);
+            --tb_idx;
+        }
+        ++tb_idx; // one too much in loop before
+        --ub_idx;
+    }
+}
\ No newline at end of file
diff --git a/test/api/layout/execute_layout_test.cpp b/test/api/layout/execute_layout_test.cpp
index 5449ec12..d77a22f7 100644
--- a/test/api/layout/execute_layout_test.cpp
+++ b/test/api/layout/execute_layout_test.cpp
@@ -46,9 +46,10 @@ TEST(execute_test, few_ubs)
         filenames{{"seq0a", "seq0b"}, {"seq1"}, {"seq2"}, {"seq3"}, {"seq4"}, {"seq5"}, {"seq6"}, {"seq7"}};
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, filenames, sketches);
+    chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     std::string const expected_file{"@CHOPPER_USER_BINS\n"
                                     "@0 seq0a seq0b\n"
@@ -142,9 +143,10 @@ TEST(execute_test, set_default_tmax)
         filenames{{"seq0"}, {"seq1"}, {"seq2"}, {"seq3"}, {"seq4"}, {"seq5"}, {"seq6"}, {"seq7"}};
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, filenames, sketches);
+    chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     EXPECT_EQ(config.hibf_config.tmax, 64u);
 }
@@ -178,9 +180,10 @@ TEST(execute_test, many_ubs)
     config.hibf_config.disable_estimate_union = true; // also disables rearrangement
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, many_filenames, sketches);
+    chopper::layout::execute(config, many_filenames, sketches, minHash_sketches);
 
     std::string const expected_file{"@CHOPPER_USER_BINS\n"
                                     "@0 seq0\n"
diff --git a/test/api/layout/execute_with_estimation_test.cpp b/test/api/layout/execute_with_estimation_test.cpp
index c33d0243..9951befe 100644
--- a/test/api/layout/execute_with_estimation_test.cpp
+++ b/test/api/layout/execute_with_estimation_test.cpp
@@ -53,9 +53,10 @@ TEST(execute_estimation_test, few_ubs)
         filenames{{"seq0"}, {"seq1"}, {"seq2"}, {"seq3"}, {"seq4"}, {"seq5"}, {"seq6"}, {"seq7"}};
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, filenames, sketches);
+    chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     ASSERT_TRUE(std::filesystem::exists(stats_file));
 
@@ -114,9 +115,10 @@ TEST(execute_estimation_test, many_ubs)
     config.hibf_config.disable_estimate_union = true; // also disables rearrangement
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, many_filenames, sketches);
+    chopper::layout::execute(config, many_filenames, sketches, minHash_sketches);
 
     ASSERT_TRUE(std::filesystem::exists(stats_file));
 
@@ -421,9 +423,10 @@ TEST(execute_estimation_test, many_ubs_force_all)
     config.hibf_config.disable_estimate_union = true; // also disables rearrangement
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, many_filenames, sketches);
+    chopper::layout::execute(config, many_filenames, sketches, minHash_sketches);
 
     ASSERT_TRUE(std::filesystem::exists(stats_file));
 
@@ -523,9 +526,10 @@ TEST(execute_estimation_test, with_rearrangement)
     config.hibf_config.number_of_user_bins = filenames.size();
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, filenames, sketches);
+    chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     ASSERT_TRUE(std::filesystem::exists(stats_file));
 
diff --git a/test/api/layout/fast_layout_test.cpp b/test/api/layout/fast_layout_test.cpp
new file mode 100644
index 00000000..9f175672
--- /dev/null
+++ b/test/api/layout/fast_layout_test.cpp
@@ -0,0 +1,422 @@
+// ---------------------------------------------------------------------------------------------------
+// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
+// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
+// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
+// shipped with this file and also available at: https://github.com/seqan/chopper/blob/main/LICENSE.md
+// ---------------------------------------------------------------------------------------------------
+
+#include <gtest/gtest.h>
+
+#include <cstddef>
+#include <filesystem>
+#include <fstream>
+#include <functional>
+#include <ranges>
+#include <string>
+#include <vector>
+
+#include <chopper/layout/execute.hpp>
+
+#include <hibf/sketch/compute_sketches.hpp>
+
+#include "../api_test.hpp"
+#include "print_debug_file.hpp"
+
+TEST(execute_test, many_ubs)
+{
+    seqan3::test::tmp_directory tmp_dir{};
+    std::filesystem::path const layout_file{tmp_dir.path() / "layout.tsv"};
+
+    std::vector<std::vector<std::string>> many_filenames;
+
+    for (size_t i{0}; i < 96u; ++i)
+        many_filenames.push_back({seqan3::detail::to_string("seq", i)});
+
+    // Creates sizes of the following series
+    // [100,101,...,120,222,223,...,241,343,344,...362,464,465,...,483,585,586,...,600]
+    // See also https://godbolt.org/z/9517eaaaG
+    auto simulated_input = [&](size_t const num, seqan::hibf::insert_iterator it)
+    {
+        size_t const desired_kmer_count = 1001 * ((num + 20) / 20) + num;
+        for (auto hash : std::views::iota(num * 600, num * 600 + desired_kmer_count))
+            it = hash;
+    };
+
+    chopper::configuration config{};
+    config.output_filename = layout_file;
+    config.disable_sketch_output = true;
+    config.hibf_config.tmax = 64;
+    config.hibf_config.input_fn = simulated_input;
+    config.hibf_config.number_of_user_bins = many_filenames.size();
+    config.hibf_config.disable_estimate_union = true; // also disables rearrangement
+
+    std::vector<seqan::hibf::sketch::hyperloglog> sketches;
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
+
+    chopper::layout::execute(config, many_filenames, sketches, minHash_sketches);
+
+    std::string const expected_file{"@CHOPPER_USER_BINS\n"
+                                    "@0 seq0\n"
+                                    "@1 seq1\n"
+                                    "@2 seq2\n"
+                                    "@3 seq3\n"
+                                    "@4 seq4\n"
+                                    "@5 seq5\n"
+                                    "@6 seq6\n"
+                                    "@7 seq7\n"
+                                    "@8 seq8\n"
+                                    "@9 seq9\n"
+                                    "@10 seq10\n"
+                                    "@11 seq11\n"
+                                    "@12 seq12\n"
+                                    "@13 seq13\n"
+                                    "@14 seq14\n"
+                                    "@15 seq15\n"
+                                    "@16 seq16\n"
+                                    "@17 seq17\n"
+                                    "@18 seq18\n"
+                                    "@19 seq19\n"
+                                    "@20 seq20\n"
+                                    "@21 seq21\n"
+                                    "@22 seq22\n"
+                                    "@23 seq23\n"
+                                    "@24 seq24\n"
+                                    "@25 seq25\n"
+                                    "@26 seq26\n"
+                                    "@27 seq27\n"
+                                    "@28 seq28\n"
+                                    "@29 seq29\n"
+                                    "@30 seq30\n"
+                                    "@31 seq31\n"
+                                    "@32 seq32\n"
+                                    "@33 seq33\n"
+                                    "@34 seq34\n"
+                                    "@35 seq35\n"
+                                    "@36 seq36\n"
+                                    "@37 seq37\n"
+                                    "@38 seq38\n"
+                                    "@39 seq39\n"
+                                    "@40 seq40\n"
+                                    "@41 seq41\n"
+                                    "@42 seq42\n"
+                                    "@43 seq43\n"
+                                    "@44 seq44\n"
+                                    "@45 seq45\n"
+                                    "@46 seq46\n"
+                                    "@47 seq47\n"
+                                    "@48 seq48\n"
+                                    "@49 seq49\n"
+                                    "@50 seq50\n"
+                                    "@51 seq51\n"
+                                    "@52 seq52\n"
+                                    "@53 seq53\n"
+                                    "@54 seq54\n"
+                                    "@55 seq55\n"
+                                    "@56 seq56\n"
+                                    "@57 seq57\n"
+                                    "@58 seq58\n"
+                                    "@59 seq59\n"
+                                    "@60 seq60\n"
+                                    "@61 seq61\n"
+                                    "@62 seq62\n"
+                                    "@63 seq63\n"
+                                    "@64 seq64\n"
+                                    "@65 seq65\n"
+                                    "@66 seq66\n"
+                                    "@67 seq67\n"
+                                    "@68 seq68\n"
+                                    "@69 seq69\n"
+                                    "@70 seq70\n"
+                                    "@71 seq71\n"
+                                    "@72 seq72\n"
+                                    "@73 seq73\n"
+                                    "@74 seq74\n"
+                                    "@75 seq75\n"
+                                    "@76 seq76\n"
+                                    "@77 seq77\n"
+                                    "@78 seq78\n"
+                                    "@79 seq79\n"
+                                    "@80 seq80\n"
+                                    "@81 seq81\n"
+                                    "@82 seq82\n"
+                                    "@83 seq83\n"
+                                    "@84 seq84\n"
+                                    "@85 seq85\n"
+                                    "@86 seq86\n"
+                                    "@87 seq87\n"
+                                    "@88 seq88\n"
+                                    "@89 seq89\n"
+                                    "@90 seq90\n"
+                                    "@91 seq91\n"
+                                    "@92 seq92\n"
+                                    "@93 seq93\n"
+                                    "@94 seq94\n"
+                                    "@95 seq95\n"
+                                    "@CHOPPER_USER_BINS_END\n"
+                                    "@CHOPPER_CONFIG\n"
+                                    "@{\n"
+                                    "@    \"chopper_config\": {\n"
+                                    "@        \"version\": 2,\n"
+                                    "@        \"data_file\": {\n"
+                                    "@            \"value0\": \"\"\n"
+                                    "@        },\n"
+                                    "@        \"debug\": false,\n"
+                                    "@        \"sketch_directory\": {\n"
+                                    "@            \"value0\": \"\"\n"
+                                    "@        },\n"
+                                    "@        \"k\": 19,\n"
+                                    "@        \"window_size\": 19,\n"
+                                    "@        \"disable_sketch_output\": true,\n"
+                                    "@        \"precomputed_files\": false,\n"
+                                    "@        \"output_filename\": {\n"
+                                    "@            \"value0\": \""
+                                    + layout_file.string()
+                                    + "\"\n"
+                                      "@        },\n"
+                                      "@        \"determine_best_tmax\": false,\n"
+                                      "@        \"force_all_binnings\": false\n"
+                                      "@    }\n"
+                                      "@}\n"
+                                      "@CHOPPER_CONFIG_END\n"
+                                      "@HIBF_CONFIG\n"
+                                      "@{\n"
+                                      "@    \"hibf_config\": {\n"
+                                      "@        \"version\": 2,\n"
+                                      "@        \"number_of_user_bins\": 96,\n"
+                                      "@        \"number_of_hash_functions\": 2,\n"
+                                      "@        \"maximum_fpr\": 0.05,\n"
+                                      "@        \"relaxed_fpr\": 0.3,\n"
+                                      "@        \"threads\": 1,\n"
+                                      "@        \"sketch_bits\": 12,\n"
+                                      "@        \"tmax\": 64,\n"
+                                      "@        \"empty_bin_fraction\": 0.0,\n"
+                                      "@        \"alpha\": 1.2,\n"
+                                      "@        \"max_rearrangement_ratio\": 0.5,\n"
+                                      "@        \"disable_estimate_union\": true,\n"
+                                      "@        \"disable_rearrangement\": true\n"
+                                      "@    }\n"
+                                      "@}\n"
+                                      "@HIBF_CONFIG_END\n"
+#ifdef _LIBCPP_VERSION
+                                      "#TOP_LEVEL_IBF fullest_technical_bin_idx:23\n"
+                                      "#LOWER_LEVEL_IBF_1 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_2 fullest_technical_bin_idx:28\n"
+                                      "#LOWER_LEVEL_IBF_3 fullest_technical_bin_idx:6\n"
+                                      "#LOWER_LEVEL_IBF_4 fullest_technical_bin_idx:15\n"
+                                      "#LOWER_LEVEL_IBF_5 fullest_technical_bin_idx:0\n"
+                                      "#LOWER_LEVEL_IBF_7 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_8 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_10 fullest_technical_bin_idx:35\n"
+                                      "#LOWER_LEVEL_IBF_13 fullest_technical_bin_idx:7\n"
+                                      "#USER_BIN_IDX	TECHNICAL_BIN_INDICES	NUMBER_OF_TECHNICAL_BINS\n"
+                                      "0	5;0	1;2\n"
+                                      "1	10;0	1;2\n"
+                                      "2	3;0	1;2\n"
+                                      "3	13;0	1;2\n"
+                                      "4	13;2	1;2\n"
+                                      "5	7;0	1;2\n"
+                                      "6	10;2	1;2\n"
+                                      "7	10;4	1;2\n"
+                                      "8	3;2	1;2\n"
+                                      "9	3;4	1;2\n"
+                                      "10	4;0	1;2\n"
+                                      "11	7;2	1;2\n"
+                                      "12	2;0	1;2\n"
+                                      "13	2;2	1;2\n"
+                                      "14	2;4	1;2\n"
+                                      "15	4;2	1;2\n"
+                                      "16	13;4	1;3\n"
+                                      "17	7;4	1;2\n"
+                                      "18	5;2	1;3\n"
+                                      "19	10;6	1;2\n"
+                                      "20	8;0	1;12\n"
+                                      "21	8;12	1;12\n"
+                                      "22	7;6	1;9\n"
+                                      "23	7;15	1;9\n"
+                                      "24	10;8	1;9\n"
+                                      "25	10;17	1;9\n"
+                                      "26	4;4	1;11\n"
+                                      "27	4;15	1;11\n"
+                                      "28	1;0	1;12\n"
+                                      "29	1;12	1;12\n"
+                                      "30	2;6	1;11\n"
+                                      "31	2;17	1;11\n"
+                                      "32	13;7	1;13\n"
+                                      "33	7;24	1;9\n"
+                                      "34	5;5	1;14\n"
+                                      "35	10;26	1;9\n"
+                                      "36	3;6	1;13\n"
+                                      "37	22	1\n"
+                                      "38	21	1\n"
+                                      "39	20	1\n"
+                                      "40	15	1\n"
+                                      "41	18	1\n"
+                                      "42	19	1\n"
+                                      "43	12	1\n"
+                                      "44	17	1\n"
+                                      "45	16	1\n"
+                                      "46	9	1\n"
+                                      "47	14	1\n"
+                                      "48	11	1\n"
+                                      "49	8;24	1;40\n"
+                                      "50	13;20	1;44\n"
+                                      "51	7;33	1;31\n"
+                                      "52	5;19	1;45\n"
+                                      "53	10;35	1;29\n"
+                                      "54	3;19	1;45\n"
+                                      "55	4;26	1;38\n"
+                                      "56	6	1\n"
+                                      "57	1;24	1;40\n"
+                                      "58	0	1\n"
+                                      "59	2;28	1;36\n"
+                                      "60	63	1\n"
+                                      "61	62	1\n"
+                                      "62	60	1\n"
+                                      "63	61	1\n"
+                                      "64	59	1\n"
+                                      "65	58	1\n"
+                                      "66	57	1\n"
+                                      "67	55	1\n"
+                                      "68	56	1\n"
+                                      "69	54	1\n"
+                                      "70	53	1\n"
+                                      "71	52	1\n"
+                                      "72	51	1\n"
+                                      "73	49	1\n"
+                                      "74	48	1\n"
+                                      "75	50	1\n"
+                                      "76	45	1\n"
+                                      "77	47	1\n"
+                                      "78	46	1\n"
+                                      "79	44	1\n"
+                                      "80	43	1\n"
+                                      "81	42	1\n"
+                                      "82	41	1\n"
+                                      "83	40	1\n"
+                                      "84	39	1\n"
+                                      "85	37	1\n"
+                                      "86	38	1\n"
+                                      "87	36	1\n"
+                                      "88	35	1\n"
+                                      "89	34	1\n"
+                                      "90	29	2\n"
+                                      "91	31	2\n"
+                                      "92	27	2\n"
+                                      "93	23	2\n"
+                                      "94	33	1\n"
+                                      "95	25	2\n"
+#else
+                                      "#TOP_LEVEL_IBF fullest_technical_bin_idx:23\n"
+                                      "#LOWER_LEVEL_IBF_1 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_2 fullest_technical_bin_idx:28\n"
+                                      "#LOWER_LEVEL_IBF_3 fullest_technical_bin_idx:6\n"
+                                      "#LOWER_LEVEL_IBF_4 fullest_technical_bin_idx:15\n"
+                                      "#LOWER_LEVEL_IBF_5 fullest_technical_bin_idx:0\n"
+                                      "#LOWER_LEVEL_IBF_6 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_8 fullest_technical_bin_idx:24\n"
+                                      "#LOWER_LEVEL_IBF_9 fullest_technical_bin_idx:35\n"
+                                      "#LOWER_LEVEL_IBF_12 fullest_technical_bin_idx:7\n"
+                                      "#USER_BIN_IDX	TECHNICAL_BIN_INDICES	NUMBER_OF_TECHNICAL_BINS\n"
+                                      "0	5;0	1;2\n"
+                                      "1	9;0	1;2\n"
+                                      "2	3;0	1;2\n"
+                                      "3	12;0	1;2\n"
+                                      "4	12;2	1;2\n"
+                                      "5	6;0	1;2\n"
+                                      "6	9;2	1;2\n"
+                                      "7	9;4	1;2\n"
+                                      "8	3;2	1;2\n"
+                                      "9	3;4	1;2\n"
+                                      "10	4;0	1;2\n"
+                                      "11	6;2	1;2\n"
+                                      "12	2;0	1;2\n"
+                                      "13	2;2	1;2\n"
+                                      "14	2;4	1;2\n"
+                                      "15	4;2	1;2\n"
+                                      "16	12;4	1;3\n"
+                                      "17	6;4	1;2\n"
+                                      "18	5;2	1;3\n"
+                                      "19	9;6	1;2\n"
+                                      "20	8;0	1;12\n"
+                                      "21	8;12	1;12\n"
+                                      "22	6;6	1;9\n"
+                                      "23	6;15	1;9\n"
+                                      "24	9;8	1;9\n"
+                                      "25	9;17	1;9\n"
+                                      "26	4;4	1;11\n"
+                                      "27	4;15	1;11\n"
+                                      "28	1;0	1;12\n"
+                                      "29	1;12	1;12\n"
+                                      "30	2;6	1;11\n"
+                                      "31	2;17	1;11\n"
+                                      "32	12;7	1;13\n"
+                                      "33	6;24	1;9\n"
+                                      "34	5;5	1;14\n"
+                                      "35	9;26	1;9\n"
+                                      "36	3;6	1;13\n"
+                                      "37	22	1\n"
+                                      "38	21	1\n"
+                                      "39	20	1\n"
+                                      "40	15	1\n"
+                                      "41	18	1\n"
+                                      "42	19	1\n"
+                                      "43	13	1\n"
+                                      "44	17	1\n"
+                                      "45	14	1\n"
+                                      "46	10	1\n"
+                                      "47	16	1\n"
+                                      "48	11	1\n"
+                                      "49	8;24	1;40\n"
+                                      "50	12;20	1;44\n"
+                                      "51	6;33	1;31\n"
+                                      "52	5;19	1;45\n"
+                                      "53	9;35	1;29\n"
+                                      "54	3;19	1;45\n"
+                                      "55	4;26	1;38\n"
+                                      "56	7	1\n"
+                                      "57	1;24	1;40\n"
+                                      "58	0	1\n"
+                                      "59	2;28	1;36\n"
+                                      "60	63	1\n"
+                                      "61	62	1\n"
+                                      "62	60	1\n"
+                                      "63	61	1\n"
+                                      "64	59	1\n"
+                                      "65	58	1\n"
+                                      "66	57	1\n"
+                                      "67	55	1\n"
+                                      "68	56	1\n"
+                                      "69	54	1\n"
+                                      "70	52	1\n"
+                                      "71	53	1\n"
+                                      "72	51	1\n"
+                                      "73	49	1\n"
+                                      "74	48	1\n"
+                                      "75	50	1\n"
+                                      "76	45	1\n"
+                                      "77	47	1\n"
+                                      "78	46	1\n"
+                                      "79	44	1\n"
+                                      "80	39	1\n"
+                                      "81	40	1\n"
+                                      "82	41	1\n"
+                                      "83	42	1\n"
+                                      "84	38	1\n"
+                                      "85	37	1\n"
+                                      "86	43	1\n"
+                                      "87	36	1\n"
+                                      "88	35	1\n"
+                                      "89	34	1\n"
+                                      "90	29	2\n"
+                                      "91	31	2\n"
+                                      "92	27	2\n"
+                                      "93	23	2\n"
+                                      "94	33	1\n"
+                                      "95	25	2\n"
+#endif
+    };
+    std::string const actual_file{string_from_file(layout_file)};
+
+    EXPECT_EQ(actual_file, expected_file) << actual_file << std::endl;
+}
diff --git a/test/api/layout/hibf_statistics_test.cpp b/test/api/layout/hibf_statistics_test.cpp
index 718b43dc..054fbcb2 100644
--- a/test/api/layout/hibf_statistics_test.cpp
+++ b/test/api/layout/hibf_statistics_test.cpp
@@ -133,11 +133,12 @@ TEST(execute_test, chopper_layout_statistics)
                                                   .disable_estimate_union = true /* also disable rearrangement */}};
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
     testing::internal::CaptureStdout();
     testing::internal::CaptureStderr();
-    chopper::layout::execute(config, many_filenames, sketches);
+    chopper::layout::execute(config, many_filenames, sketches, minHash_sketches);
     std::string layout_result_stdout = testing::internal::GetCapturedStdout();
     std::string layout_result_stderr = testing::internal::GetCapturedStderr();
 
@@ -190,9 +191,10 @@ TEST(execute_test, chopper_layout_statistics_determine_best_bins)
                                                   .disable_estimate_union = true /* also disable rearrangement */}};
 
     std::vector<seqan::hibf::sketch::hyperloglog> sketches;
-    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches);
+    std::vector<seqan::hibf::sketch::minhashes> minHash_sketches{};
+    seqan::hibf::sketch::compute_sketches(config.hibf_config, sketches, minHash_sketches);
 
-    chopper::layout::execute(config, filenames, sketches);
+    chopper::layout::execute(config, filenames, sketches, minHash_sketches);
 
     std::string expected_cout =
         R"expected_cout(## ### Parameters ###
diff --git a/test/api/lsh_test.cpp b/test/api/lsh_test.cpp
new file mode 100644
index 00000000..1ebfd552
--- /dev/null
+++ b/test/api/lsh_test.cpp
@@ -0,0 +1,159 @@
+#include <gtest/gtest.h> // for Test, TestInfo, EXPECT_EQ, Message, TEST, TestPartResult
+
+#include <cstddef>     // for size_t
+#include <sstream>     // for operator<<, char_traits, basic_ostream, basic_stringstream, strings...
+#include <string>      // for allocator, string
+#include <string_view> // for operator<<
+#include <vector>      // for vector
+
+#include <chopper/lsh.hpp>
+
+TEST(Cluster_test, ctor_from_id)
+{
+    size_t user_bin_idx{5};
+    chopper::Cluster const cluster{user_bin_idx};
+
+    EXPECT_EQ(cluster.id(), user_bin_idx);
+    EXPECT_FALSE(cluster.empty());
+    EXPECT_EQ(cluster.size(), 1u);
+    ASSERT_EQ(cluster.contained_user_bins().size(), 1u);
+    EXPECT_EQ(cluster.contained_user_bins()[0], user_bin_idx);
+    EXPECT_TRUE(cluster.is_valid(user_bin_idx));
+}
+
+TEST(Cluster_test, move_to)
+{
+    size_t user_bin_idx1{5};
+    size_t user_bin_idx2{7};
+    chopper::Cluster cluster1{user_bin_idx1};
+    chopper::Cluster cluster2{user_bin_idx2};
+
+    EXPECT_TRUE(cluster1.is_valid(user_bin_idx1));
+    EXPECT_TRUE(cluster2.is_valid(user_bin_idx2));
+
+    cluster2.move_to(cluster1);
+
+    // cluster1 now contains user bins 5 and 7
+    EXPECT_EQ(cluster1.size(), 2u);
+    ASSERT_EQ(cluster1.contained_user_bins().size(), 2u);
+    EXPECT_EQ(cluster1.contained_user_bins()[0], user_bin_idx1);
+    EXPECT_EQ(cluster1.contained_user_bins()[1], user_bin_idx2);
+
+    // cluster 2 is empty
+    EXPECT_TRUE(cluster2.has_been_moved());
+    EXPECT_TRUE(cluster2.empty());
+    EXPECT_EQ(cluster2.size(), 0u);
+    EXPECT_EQ(cluster2.contained_user_bins().size(), 0u);
+    EXPECT_EQ(cluster2.moved_to_cluster_id(), cluster1.id());
+
+    // both should still be valid
+    EXPECT_TRUE(cluster1.is_valid(user_bin_idx1));
+    EXPECT_TRUE(cluster2.is_valid(user_bin_idx2));
+}
+
+TEST(Multicluster_test, ctor_from_cluster)
+{
+    chopper::Cluster const cluster1{5};
+    chopper::MultiCluster const multi_cluster1{cluster1};
+
+    EXPECT_EQ(multi_cluster1.id(), cluster1.id());
+    EXPECT_FALSE(multi_cluster1.empty());
+    EXPECT_EQ(multi_cluster1.size(), 1u);
+    ASSERT_EQ(multi_cluster1.contained_user_bins().size(), 1u);
+    ASSERT_EQ(multi_cluster1.contained_user_bins()[0].size(), 1u);
+    EXPECT_EQ(multi_cluster1.contained_user_bins()[0][0], 5u);
+    EXPECT_TRUE(multi_cluster1.is_valid(cluster1.id()));
+}
+
+TEST(Multicluster_test, ctor_from_moved_cluster)
+{
+    chopper::Cluster cluster1{5};
+    chopper::Cluster cluster2{7};
+    cluster2.move_to(cluster1);
+
+    chopper::MultiCluster const multi_cluster2{cluster2};
+
+    EXPECT_EQ(multi_cluster2.id(), cluster2.id());
+    EXPECT_TRUE(multi_cluster2.empty());
+    EXPECT_TRUE(multi_cluster2.has_been_moved());
+    EXPECT_EQ(multi_cluster2.moved_to_cluster_id(), cluster2.moved_to_cluster_id());
+    EXPECT_EQ(multi_cluster2.size(), 0u);
+    ASSERT_EQ(multi_cluster2.contained_user_bins().size(), 0u);
+    EXPECT_TRUE(multi_cluster2.is_valid(cluster2.id()));
+}
+
+TEST(Multicluster_test, move_to)
+{
+    chopper::Cluster cluster1{5};
+    chopper::Cluster cluster2{7};
+    cluster2.move_to(cluster1);
+    ASSERT_EQ(cluster1.size(), 2u);
+    chopper::Cluster const cluster3{13};
+
+    chopper::MultiCluster multi_cluster1{cluster1};
+    EXPECT_TRUE(multi_cluster1.is_valid(cluster1.id()));
+    EXPECT_EQ(multi_cluster1.size(), 1u);
+    EXPECT_EQ(multi_cluster1.contained_user_bins().size(), 1u);
+    EXPECT_EQ(multi_cluster1.contained_user_bins()[0].size(), 2u);
+
+    chopper::MultiCluster multi_cluster3{cluster3};
+    EXPECT_TRUE(multi_cluster3.is_valid(cluster3.id()));
+    EXPECT_EQ(multi_cluster3.size(), 1u);
+
+    multi_cluster1.move_to(multi_cluster3);
+
+    EXPECT_TRUE(multi_cluster1.is_valid(cluster1.id()));
+    EXPECT_TRUE(multi_cluster3.is_valid(cluster3.id()));
+
+    // multi_cluster1 has been moved and is empty now
+    EXPECT_TRUE(multi_cluster1.empty());
+    EXPECT_EQ(multi_cluster1.size(), 0u);
+    EXPECT_TRUE(multi_cluster1.has_been_moved());
+    EXPECT_EQ(multi_cluster1.moved_to_cluster_id(), multi_cluster3.id());
+
+    // multi_cluster3 contains 2 clusters now, {13} and {5, 7}
+    EXPECT_FALSE(multi_cluster3.empty());
+    EXPECT_EQ(multi_cluster3.size(), 2u); // two clusters
+    ASSERT_EQ(multi_cluster3.contained_user_bins().size(), 2u);
+    ASSERT_EQ(multi_cluster3.contained_user_bins()[0].size(), 1u);
+    ASSERT_EQ(multi_cluster3.contained_user_bins()[1].size(), 2u);
+    ASSERT_EQ(multi_cluster3.contained_user_bins()[0][0], cluster3.id());
+    ASSERT_EQ(multi_cluster3.contained_user_bins()[1][0], cluster1.id());
+    ASSERT_EQ(multi_cluster3.contained_user_bins()[1][1], cluster2.id());
+}
+
+TEST(LSH_find_representative_cluster_test, cluster_one_move)
+{
+    std::vector<chopper::Cluster> clusters{chopper::Cluster{0}, chopper::Cluster{1}};
+    clusters[1].move_to(clusters[0]);
+
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(clusters, clusters[1].id()), clusters[0].id());
+}
+
+TEST(LSH_find_representative_cluster_test, multi_cluster_one_move)
+{
+    std::vector<chopper::MultiCluster> mclusters{{chopper::Cluster{0}}, {chopper::Cluster{1}}};
+    mclusters[1].move_to(mclusters[0]);
+
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(mclusters, mclusters[1].id()), mclusters[0].id());
+}
+
+TEST(LSH_find_representative_cluster_test, cluster_two_moves)
+{
+    std::vector<chopper::Cluster> clusters{chopper::Cluster{0}, chopper::Cluster{1}, chopper::Cluster{2}};
+    clusters[2].move_to(clusters[1]);
+    clusters[1].move_to(clusters[0]);
+
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(clusters, clusters[1].id()), clusters[0].id());
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(clusters, clusters[2].id()), clusters[0].id());
+}
+
+TEST(LSH_find_representative_cluster_test, multi_cluster_two_moves)
+{
+    std::vector<chopper::MultiCluster> mclusters{{chopper::Cluster{0}}, {chopper::Cluster{1}}, {chopper::Cluster{2}}};
+    mclusters[2].move_to(mclusters[1]);
+    mclusters[1].move_to(mclusters[0]);
+
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(mclusters, mclusters[1].id()), mclusters[0].id());
+    EXPECT_EQ(chopper::LSH_find_representative_cluster(mclusters, mclusters[2].id()), mclusters[0].id());
+}