Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 14 additions & 4 deletions singlem/supplement.py
Original file line number Diff line number Diff line change
Expand Up @@ -420,6 +420,10 @@ def run_hmmsearch_on_one_genome(lock, data, matched_transcripts_fna, working_dir
return (total_num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts)


def _run_hmmsearch_on_one_genome_star(args):
return run_hmmsearch_on_one_genome(*args)


def gather_hmmsearch_results(num_threads, working_directory, old_metapackage, new_genome_transcripts_and_proteins,
hmmsearch_evalue):
# Run hmmsearch using a concatenated set of HMMs from each graftm package in the metapackage
Expand Down Expand Up @@ -454,12 +458,18 @@ def gather_hmmsearch_results(num_threads, working_directory, old_metapackage, ne
# context, otherwise we get deadlock. See
# https://pola-rs.github.io/polars/user-guide/misc/multiprocessing/#example
with get_context('spawn').Pool(num_threads) as pool:
map_result = pool.starmap(
run_hmmsearch_on_one_genome,
[(lock, data, matched_transcripts_fna, working_directory, hmmsearch_evalue, concatenated_hmms) for data in new_genome_transcripts_and_proteins.items()],
map_result = pool.imap_unordered(
_run_hmmsearch_on_one_genome_star,
[(lock, data, matched_transcripts_fna, working_directory, hmmsearch_evalue, concatenated_hmms)
for data in new_genome_transcripts_and_proteins.items()],
chunksize=1)

for (num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) in map_result:
for (num_transcripts, failure_genomes, num_transcriptomes, num_found_transcripts) in tqdm(
map_result,
total=len(new_genome_transcripts_and_proteins),
desc="Running hmmsearch",
unit="genome",
):
total_num_transcripts += num_transcripts
total_failure_genomes += failure_genomes
total_num_transcriptomes += num_transcriptomes
Expand Down