From f4b5a466914e95eb25921ef3b0d9cc873f6c5638 Mon Sep 17 00:00:00 2001 From: Ben J Woodcroft Date: Thu, 8 Jan 2026 15:40:17 +1000 Subject: [PATCH 1/2] Add progress bar for hmmsearch results --- singlem/supplement.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/singlem/supplement.py b/singlem/supplement.py index 4b90b206..ec28a902 100755 --- a/singlem/supplement.py +++ b/singlem/supplement.py @@ -459,7 +459,12 @@ def gather_hmmsearch_results(num_threads, working_directory, old_metapackage, ne [(lock, data, matched_transcripts_fna, working_directory, hmmsearch_evalue, concatenated_hmms) for data in new_genome_transcripts_and_proteins.items()], chunksize=1) - for (num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) in map_result: + for (num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) in tqdm( + map_result, + total=len(new_genome_transcripts_and_proteins), + desc="Running hmmsearch", + unit="genome", + ): total_num_transcripts += num_transcripts total_failure_genomes += failure_genomes total_num_transcriptomes += num_transcriptomes From 527385382ea54805a97138921f1f5dc53bf3c016 Mon Sep 17 00:00:00 2001 From: Ben J Woodcroft Date: Thu, 8 Jan 2026 16:02:28 +1000 Subject: [PATCH 2/2] Update hmmsearch progress tracking --- singlem/supplement.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/singlem/supplement.py b/singlem/supplement.py index ec28a902..20b5a56c 100755 --- a/singlem/supplement.py +++ b/singlem/supplement.py @@ -420,6 +420,10 @@ def run_hmmsearch_on_one_genome(lock, data, matched_transcripts_fna, working_dir return (total_num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) +def _run_hmmsearch_on_one_genome_star(args): + return run_hmmsearch_on_one_genome(*args) + + def gather_hmmsearch_results(num_threads, working_directory, old_metapackage, new_genome_transcripts_and_proteins, hmmsearch_evalue): # Run hmmsearch using a concatenated set of HMMs from each graftm package in the metapackage @@ -454,9 +458,10 @@ def gather_hmmsearch_results(num_threads, working_directory, old_metapackage, ne # context, otherwise we get deadlock. See # https://pola-rs.github.io/polars/user-guide/misc/multiprocessing/#example with get_context('spawn').Pool(num_threads) as pool: - map_result = pool.starmap( - run_hmmsearch_on_one_genome, - [(lock, data, matched_transcripts_fna, working_directory, hmmsearch_evalue, concatenated_hmms) for data in new_genome_transcripts_and_proteins.items()], + map_result = pool.imap_unordered( + _run_hmmsearch_on_one_genome_star, + [(lock, data, matched_transcripts_fna, working_directory, hmmsearch_evalue, concatenated_hmms) + for data in new_genome_transcripts_and_proteins.items()], chunksize=1) for (num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) in tqdm(