From 80feb96e2a6f9b99f4949df65a3e4cc9258d8a51 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 16 Dec 2025 03:27:03 +0000
Subject: [PATCH 1/4] Initial plan


From 1a9e4e1647bf2ee3094e23fec4d369d81e23e966 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 16 Dec 2025 03:36:04 +0000
Subject: [PATCH 2/4] Add zst format support to core file handling

Co-authored-by: wwood <15348+wwood@users.noreply.github.com>
---
 pixi.toml                              |  1 +
 singlem/biolib_lite/prodigal_biolib.py |  6 +++++-
 singlem/biolib_lite/seq_io.py          |  8 +++++++-
 singlem/main.py                        |  7 ++++++-
 singlem/orf_length_checker.py          | 11 +++++++++--
 singlem/otu_table_collection.py        | 12 ++++++++++++
 singlem/summariser.py                  | 10 ++++++++--
 singlem/utils.py                       |  6 +++---
 8 files changed, 51 insertions(+), 10 deletions(-)

diff --git a/pixi.toml b/pixi.toml
index bff158c4..67f17e66 100644
--- a/pixi.toml
+++ b/pixi.toml
@@ -46,6 +46,7 @@ pyarrow = "*"
 galah = ">=0.4.0"
 sqlparse = "*"  # Required indirectly (e.g. taxtastic)
 zenodo_backpack = ">=0.4.0"
+zstandard = "*"
 # Optional (commented out)
 # python-annoy = "*"
 # nmslib = "*"
diff --git a/singlem/biolib_lite/prodigal_biolib.py b/singlem/biolib_lite/prodigal_biolib.py
index cd9605ec..bea87e69 100644
--- a/singlem/biolib_lite/prodigal_biolib.py
+++ b/singlem/biolib_lite/prodigal_biolib.py
@@ -114,13 +114,17 @@ def _producer(self, genome_file):
                     else:
                         proc_str = 'single'  # estimate parameters from data
 
-                    # If this is a gzipped genome, re-write the uncompressed genome
+                    # If this is a gzipped or zst compressed genome, re-write the uncompressed genome
                     # file to disk
                     prodigal_input = genome_file
                     if genome_file.endswith('.gz'):
                         prodigal_input = os.path.join(
                             tmp_dir, os.path.basename(genome_file[0:-3]) + '.fna')
                         write_fasta(seqs, prodigal_input)
+                    elif genome_file.endswith('.zst'):
+                        prodigal_input = os.path.join(
+                            tmp_dir, os.path.basename(genome_file[0:-4]) + '.fna')
+                        write_fasta(seqs, prodigal_input)
 
                     # there may be ^M character in the input file,
                     # the following code is similar to dos2unix command to remove
diff --git a/singlem/biolib_lite/seq_io.py b/singlem/biolib_lite/seq_io.py
index 1212d60b..7fe1a612 100644
--- a/singlem/biolib_lite/seq_io.py
+++ b/singlem/biolib_lite/seq_io.py
@@ -20,6 +20,7 @@
 import os
 import sys
 import traceback
+import zstandard
 
 from .exceptions import InputFileError
 
@@ -55,6 +56,8 @@ def read_fasta(fasta_file, keep_annotation=False):
 
         if fasta_file.endswith('.gz'):
             file_f, file_mode = gzip.open, 'rt'
+        elif fasta_file.endswith('.zst'):
+            file_f, file_mode = zstandard.open, 'rt'
         else:
             file_f, file_mode = open, 'r'
 
@@ -126,6 +129,9 @@ def read_fasta_seq(fasta_file, keep_annotation=False):
         if fasta_file.endswith('.gz'):
             open_file = gzip.open
             mode = 'rb'
+        elif fasta_file.endswith('.zst'):
+            open_file = zstandard.open
+            mode = 'rb'
 
         seq_id = None
         annotation = None
@@ -201,7 +207,7 @@ def read_seq(seq_file, keep_annotation=False):
         and the annotation if keep_annotation is True.
     """
 
-    if seq_file.endswith(('.fq.gz', '.fastq.gz', '.fq', '.fq.gz')):
+    if seq_file.endswith(('.fq.gz', '.fastq.gz', '.fq', '.fq.gz', '.fq.zst', '.fastq.zst')):
         raise Exception("Cannot read FASTQ files.")
         # for rtn in read_fastq_seq(seq_file):
         #     yield rtn
diff --git a/singlem/main.py b/singlem/main.py
index 63f73d5d..ea100198 100755
--- a/singlem/main.py
+++ b/singlem/main.py
@@ -14,6 +14,7 @@
 import gzip
 import tempfile
 import json
+import zstandard
 
 from bird_tool_utils import *
 from bird_tool_utils.people import *
@@ -951,7 +952,11 @@ def add_prokaryotic_fraction_parser(name, description, deprecated=False):
                 with open(args.input_gzip_archive_otu_table_list) as f:
                     for arc in f.readlines():
                         try:
-                            otus.add_archive_otu_table(gzip.open(arc.strip()))
+                            arc_path = arc.strip()
+                            if arc_path.endswith('.zst'):
+                                otus.add_archive_otu_table(zstandard.open(arc_path))
+                            else:
+                                otus.add_archive_otu_table(gzip.open(arc_path))
                         except json.decoder.JSONDecodeError:
                             logging.warning("Failed to parse JSON from archive OTU table {}, skipping".format(arc))
             otus.set_target_taxonomy_by_string(args.taxonomy)
diff --git a/singlem/orf_length_checker.py b/singlem/orf_length_checker.py
index d387e69f..e754aa7b 100644
--- a/singlem/orf_length_checker.py
+++ b/singlem/orf_length_checker.py
@@ -10,8 +10,15 @@ def check_sequence_file_contains_an_orf(path, min_orf_length):
         streaming. Only checks the first 1000 lines of the sequence file.'''
 
         # Cannot use extern here because the SIGPIPE signals generated
-        result = subprocess.check_output(['bash','-c',"cat '%s' |zcat --stdout -f  |head -n1000 |orfm -m %i |head -n2" %(
-            path, min_orf_length
+        # Determine decompression command based on file extension
+        if path.endswith('.zst'):
+            decompress_cmd = "zstdcat '%s'" % path
+        else:
+            # zcat with -f handles both plain and gzipped files
+            decompress_cmd = "zcat --stdout -f '%s'" % path
+        
+        result = subprocess.check_output(['bash','-c',"%s |head -n1000 |orfm -m %i |head -n2" %(
+            decompress_cmd, min_orf_length
         )])
         if len(result) == 0:
             return False
diff --git a/singlem/otu_table_collection.py b/singlem/otu_table_collection.py
index 43f5f323..5ed3ec73 100644
--- a/singlem/otu_table_collection.py
+++ b/singlem/otu_table_collection.py
@@ -3,6 +3,7 @@
 from collections import OrderedDict
 import gzip
 import json
+import zstandard
 
 from .archive_otu_table import ArchiveOtuTable
 from .otu_table import OtuTable
@@ -193,6 +194,7 @@ def __init__(self):
         self._otu_table_file_paths = []
         self._archive_table_file_paths = []
         self._gzip_archive_table_file_paths = []
+        self._zst_archive_table_file_paths = []
         self._archive_table_objects = []
         self.min_archive_otu_table_version = None
 
@@ -222,6 +224,9 @@ def add_archive_otu_table_file(self, file_path):
     def add_gzip_archive_otu_table_file(self, file_path):
         self._gzip_archive_table_file_paths.append(file_path)
 
+    def add_zst_archive_otu_table_file(self, file_path):
+        self._zst_archive_table_file_paths.append(file_path)
+
     def add_archive_otu_table_object(self, archive_table):
         '''Not technically streaming, but easier to put this here for pipe
         instead of implementing each_sample_otus() for non-streaming OTU
@@ -253,6 +258,13 @@ def __iter__(self):
                         yield otu
                 except json.decoder.JSONDecodeError:
                     logging.error(f"JSON parsing error in {file_path}, skipping this one")
+        for file_path in self._zst_archive_table_file_paths:
+            with zstandard.open(file_path) as f:
+                try:
+                    for otu in ArchiveOtuTable.read(f, min_version=self.min_archive_otu_table_version):
+                        yield otu
+                except json.decoder.JSONDecodeError:
+                    logging.error(f"JSON parsing error in {file_path}, skipping this one")
         for archive_table in self._archive_table_objects:
             for otu in archive_table:
                 yield otu
diff --git a/singlem/summariser.py b/singlem/summariser.py
index fff7de01..20cddfdf 100644
--- a/singlem/summariser.py
+++ b/singlem/summariser.py
@@ -8,6 +8,7 @@
 import pandas as pd
 import polars as pl
 import gzip
+import zstandard
 
 from .otu_table import OtuTable
 from .rarefier import Rarefier
@@ -389,8 +390,13 @@ def read_archive_table(df, f, prev_ar):
                 logging.debug(f"Found {len(lines)} lines in achive otu table list.")
                 for a in lines:
                     logging.debug("Reading gzip archive table {} ..".format(a))
-                    with gzip.open(a.strip()) as g:
-                        overall_df, ar = read_archive_table(overall_df, g, ar)
+                    a_path = a.strip()
+                    if a_path.endswith('.zst'):
+                        with zstandard.open(a_path) as g:
+                            overall_df, ar = read_archive_table(overall_df, g, ar)
+                    else:
+                        with gzip.open(a_path) as g:
+                            overall_df, ar = read_archive_table(overall_df, g, ar)
         df = overall_df
 
         # Remove suffixes
diff --git a/singlem/utils.py b/singlem/utils.py
index 1e6acd95..f237e97b 100644
--- a/singlem/utils.py
+++ b/singlem/utils.py
@@ -34,10 +34,10 @@ class FastaNameToSampleName:
     @staticmethod
     def fasta_to_name(query_sequences_file):
         sample_name = os.path.basename(query_sequences_file)
-        # Put .gz first so it is stripped off first.
-        for extension in ('.gz','.fna','.fq','.fastq','.fasta','.fa'):
+        # Put .gz and .zst first so they are stripped off first.
+        for extension in ('.gz','.zst','.fna','.fq','.fastq','.fasta','.fa'):
             if sample_name.endswith(extension):
                 sample_name = sample_name[0:(len(sample_name)-len(extension))]
-                if extension != '.gz':
+                if extension not in ('.gz', '.zst'):
                     break
         return sample_name

From c0f6db921778c292d795843e48d02420f0835a8d Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 16 Dec 2025 03:38:16 +0000
Subject: [PATCH 3/4] Add tests for zst file handling

Co-authored-by: wwood <15348+wwood@users.noreply.github.com>
---
 test/data/small.otu_table.json.zst | Bin 0 -> 1624 bytes
 test/test_orf_length_checker.py    |   9 +++++++++
 test/test_summariser.py            |   8 ++++++++
 3 files changed, 17 insertions(+)
 create mode 100644 test/data/small.otu_table.json.zst

diff --git a/test/data/small.otu_table.json.zst b/test/data/small.otu_table.json.zst
new file mode 100644
index 0000000000000000000000000000000000000000..dad4236ebd22f60e858866fe750e2fc4360bf459
GIT binary patch
literal 1624
zcmV-e2B-NbwJ-eySY0vzN{%cRDIjf=bhEq(JaXXU1R0ycveov19nPw++Ufw}QzC>E
znMGEn#R38X#{)zGH~}&N=v1*c^etQb6B9~q<mWpkv(_^mJ#8=Uc&x`PLX;GL4SBJ4
zQ~!%$FP^^86m~t7r(aIj$>%(qT+Q>c6jPAbWkLzxi<-2?nQ=?uPpSS)y;3U5t5lw}
z+?FlxzLm#3mo1x2lMSgkXseyeo-)UveroC2R7x36W;3&q8m=ns*`>{4`5|PV92qO0
z%9NDq-XN0-pWd3jHkP=*t*Hq$a^z`|&yXH&nI%4=m0}v1kzynNmt?cF*EfZWlDG4u
zr{wf__zx)>Mc+zv%yeIxPQ4Y0h2pHAqKaJ4n?6qH=@B~<t(<l~KHtxElpCRbqSaC`
zVNqsdMPi{Sb6M}jpf+U=%UY?$I)}Gv6Qyi}deC!oX5ZGyO=;(!=$xWKB0&Lh%xwb@
z4F-k=8W9c7(C|QEVHJtRB@z@34hj$t4uOJ#$qO>W=4cl}t%=Gs9yHv;M%}~)wLTB?
zC1ja0)6wF|(x*w)EX79A*{S9-vb{V8-FY(HXU?X>){KvC%@7+Qa}~p->Ju{DH<sCK
z&VHetgM3o;kGU;%#d=Ia&9moI#~-W%X(2QieBnWZ0)!&5aG)q-DBb)i_f`kh8lGGG
zyeyjV{7rABV?5<~zu<aiY9{09Ib)9Z`JA2|W7NsX3ysge7CPwGx^I!$!Q@*u>)hGW
zj;5qD@25YtP5Ie6E1wL)cBGr$%W2BRcKlyPU(CCjGAkZ*+E(n<&Um@$Vw0K@lkr4|
z#d+&JJua&ml<Yc9do817P6+Y&GGlzo(c|Y*Qm@};*;zZNWs-UPH%+d|buiUno+H+7
zW;+NSF2q@ysoj>DCC&XCuQdF=_jQ{(@iFZ=g(!I`2NA-`HkRrWpY-{iSg;X#)`E^(
zN?bM7yk<%t^~$p4>Zbi(C%u~^;;b&mbuXnxCl}ASiXqO@X$rr=Tuzh+`?FAQw!NLR
z>g#?vPpVbvl(j7A==sw6<Gq6GjqX%wxkCxPxno`k(IZTlcBPyXZaTCl8PA|Dg_G0g
zU2jTc(7{K{CGnEi{@If5?F-qtJlicPKi4N?{*L0F9!;lB+i}}cW1o!4s`@15N{#yl
z87}FV#(0-Gf0C!Iv%7NJeHIyR&ob?TshGWVf8O&${mxj7@=Qt7=L(fRrh3e=?x`8N
zHfl|-KP&9*WJcyZvfOUN-lof{(XcZeKP+BZ@)bF)VLf+MrR&n+we@^UbJT2mJ#nEf
zTF&a2XFJG;Y?_XxRQj0>D#}MyUI7(2aNt0ZAO~_N5(`C=AdDkXBo+!JIT*>oIF2Ma
zj^j9z1WA$uITVS7;z*DrNsa?yBng5fjN@Q14g^UUNOBwrgK;29axe}?k|4=}BnRVQ
z97&Qe62@^LiUx^=LoDnk77h}NIt@%Uq1dG*Xr7(~v@A==D`}Zv63-IM8J+}`W|x3v
zmVgo%V1RL89$*6`9AgPs&JqOxFkpcK6b%;EA;CalVL>4!7(jSLgrvampg=&Q0U^O)
z6&^eytpJTiCm2LHI3%D*EKKVdv_KP!l2|B^1UU{yf*?ndAV`uRjDvw33`TMs2m?8e
zBSDaZfg}uuBC${`EGz?rBC${$umItqVTM4XLBS&&9w0OpmcaqSffXJWm<6VQqCtcR
z2Z%;Pga8^MaA<I#F#$Kc0pYC>;ou7?LW47GL!;LpCSY)AaFAdl{J}mr1P%!zLZN{M
zLxhIZ;1M411qcia(clky!4>`#T~%31k4n}in{)X$Meg_2OO1?svQc?6btO8Sj{N?#
zqSNWRyRi*=bEzz->{+tOR1DU?`8l%ls91k0FOG*z*Iidhs8JAPkTQlLLW~hfNP>~`
z0l)$op`qFa5<oPA2F*m1|Cv~M76&>Ia7jIS-S?3OAs_emv<3Q`q6)8ZAzmsGunNXE
zu=bI@7Xxktpu$QBnOnS9`66zddj0|qEO%zEC3M)0X*d+S^c8PiYCUMtTZRZBf!vfY
z+?4)3?l#Ctlk?@m-jf*!Z~~FC2_w$1wHTG350rooIDiBwfGf=q0Y4B{DD2E{^#gbS
W1V{h{SOD^t_ESOfgYK)@KZ~nx@AB^e

literal 0
HcmV?d00001

diff --git a/test/test_orf_length_checker.py b/test/test_orf_length_checker.py
index d9311a79..30459961 100644
--- a/test/test_orf_length_checker.py
+++ b/test/test_orf_length_checker.py
@@ -87,5 +87,14 @@ def test_gzip_good(self):
                 f.name+'.gz', 72
             ))
 
+    def test_zst_good(self):
+        with tempfile.NamedTemporaryFile() as f:
+            f.write(('>seq\n'+'A'*100+'\n').encode())
+            f.flush()
+            extern.run("zstd -k {}".format(f.name))
+            self.assertTrue(OrfLengthChecker.check_sequence_file_contains_an_orf(
+                f.name+'.zst', 72
+            ))
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/test_summariser.py b/test/test_summariser.py
index 7c463283..0d3a9918 100644
--- a/test/test_summariser.py
+++ b/test/test_summariser.py
@@ -63,6 +63,14 @@ def test_archive_to_otu_table_conversion(self):
         self.assertEqual('gene\tsample\tsequence\tnum_hits\tcoverage\ttaxonomy\n\
 S1.5.ribosomal_protein_L11_rplK\tsmall\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t9.76\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n', obs)
 
+    def test_zst_archive_to_otu_table_conversion(self):
+        cmd = "{} summarise --input-gzip-archive-otu-table-list <(ls {}/small.otu_table.json.zst) --output-otu-table /dev/stdout".format(
+            path_to_script,
+            path_to_data)
+        obs = extern.run(cmd)
+        self.assertEqual('gene\tsample\tsequence\tnum_hits\tcoverage\ttaxonomy\n\
+S1.5.ribosomal_protein_L11_rplK\tsmall\tCCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG\t4\t9.76\tRoot; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales\n', obs)
+
     def test_gzip_archive_list_to_otu_table_conversion(self):
 
         archive = '{"fields": ["gene", "sample", "sequence", "num_hits", "coverage", "taxonomy", "read_names", "nucleotides_aligned", "taxonomy_by_known?"], "singlem_package_sha256s": ["2b2afe0114de20451fccfe74360756376dc83d001d890e84e322ab0833eca6ba", "7f406a73d8bb176994055cb966ff350e208986d12c8215722686c17c26e548c7", "735b44ae547c133163cb7d40f417292c35423864d00c95e7f1b32091b27d46c5", "8fc6dcce2766cc01defb3b5c689a1ed8ce9d59b725c67e58c2044dafaae908b3", "172df49937742b8411d41d217500d862567374401eaf393b25107b22ac630202", "4cb1bf226bf28d8198ed5c29e8a76df411d96a6c3ce1256af16887b9a184b0a6", "d473d3ae677e6e46202461ccdedb2aef23c0a10a3412422586b37e397ca37294", "431a2860bb890cd1c7193c565cbf0cc227850cba36fb17fe94df686e74ee9b11", "faa663527bb9aea63cef03859311f2e7f55fe98590a5ec85c5ba85815a6fd13e", "a0daf111380e6e499ad9c10c3ac413aa9016c7503dd459825100168524bff0d1", "aba631d4735aeb9d2dfbbbfee1c0739bf9e99ad6532a3be04ff627f3e6efdae2", "bba10c1feb0c26bdf46aa3d1dcb992744a699cde5cf02bb2728f8397378b342f", "4d91dd794b25fd256508f0814f6a2d31e20dc85e0aa9ea405031398565276768", "9b23c524a6210af0706eea7252c2d378888029f141b9305c3e88cbac3fd83f88", "50a209417b455a48bc67702d6a3809a172c57f00785d8c705a3322e6e2a71f72"], "version": 1, "alignment_hmm_sha256s": ["dd9b7e283598360b89ec91ff3f5c509361a6108a2eadc44bfb29646b1510f6b7", "b1bb943c3449a78f937db960bfdf6b2bed641388d33fce3cb2d5f69e79946ea6", "de92c90f2c83e380ae3953972fb63fcb8ce868dab87a305f9f1811b84ffb3d39", "453ed4a62608a4aec36117a2dd1a276709ff6b130ecb8d7b1612926bfab25527", "20cc450cf4157ecf1772e0325d4d8ed400b597d888a5cb5044ca69098f935656", "4b0bf5b3d7fd2ca16e54eed59d3a07eab388f70f7078ac096bf415f1c04731d9", "7cbba7ba0ed58d21c7519ba3fcef0abe43378e5c38c985b0d5e0e5219f141d92", "4a3bbe5ac594ef3c7c820e74544828e19eca68bf860d64f928729eb4530fce4e", "06a4bed0a765971b891ca4a4bf5680aeef4a4a249ce0c028798c0e912f0ccfb4", "2678fe218ca860a2d88bdbf76935d8c78a00ab6603a041a432505d754ef08250", "b54ff98aa03ab31af39c737a569b23ee4ed9296c8ea088562bfb3db87c38fe4a", "4ae31f14067bf183f38dca20f2aefb580e5ff25848881dd988908b70b67761bb", "d7bb3d544133f38110a329712b3ace7e7d7c989dafa3815d2d5a292b4c575f50", "7639bb919ef54f7baff3ed3a8c924efca97ed375cf4120a6e05d98fd6ef52cbb", "6923b889888ea34fabf463b2c8ad5fe23c94828f1a2631a07601f246f5e87150"], "otus": [["4.11.ribosomal_protein_L10", "minimal", "TTACGTTCACAATTACGTGAAGCTGGTGTTGAGTATAAAGTATACAAAAACACTATGGTA", 2, 4.878048780487805, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales; f__Staphylococcaceae; g__Staphylococcus", ["HWI-ST1243:156:D1K83ACXX:7:1106:18671:79482", "HWI-ST1243:156:D1K83ACXX:7:1105:19152:28331"], [60, 60], false], ["4.12.ribosomal_protein_L11_rplK", "minimal", "CCTGCAGGTAAAGCGAATCCAGCACCACCAGTTGGTCCAGCATTAGGTCAAGCAGGTGTG", 4, 9.75609756097561, "Root; d__Bacteria; p__Firmicutes; c__Bacilli; o__Bacillales", ["HWI-ST1243:156:D1K83ACXX:7:1109:18214:9910", "HWI-ST1243:156:D1K83ACXX:7:1103:21187:63124", "HWI-ST1243:156:D1K83ACXX:7:1108:10813:6928", "HWI-ST1243:156:D1K83ACXX:7:1105:12385:81842"], [60, 60, 60, 60], false]]}'

From a5b3b7fc63382bf498ea0f8274fbe394818ccc72 Mon Sep 17 00:00:00 2001
From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com>
Date: Tue, 16 Dec 2025 03:41:49 +0000
Subject: [PATCH 4/4] Fix code review issues: remove duplicate .fq.gz and use
 correct mode for zst

Co-authored-by: wwood <15348+wwood@users.noreply.github.com>
---
 singlem/biolib_lite/seq_io.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/singlem/biolib_lite/seq_io.py b/singlem/biolib_lite/seq_io.py
index 7fe1a612..abc60ce2 100644
--- a/singlem/biolib_lite/seq_io.py
+++ b/singlem/biolib_lite/seq_io.py
@@ -131,7 +131,7 @@ def read_fasta_seq(fasta_file, keep_annotation=False):
             mode = 'rb'
         elif fasta_file.endswith('.zst'):
             open_file = zstandard.open
-            mode = 'rb'
+            mode = 'rt'
 
         seq_id = None
         annotation = None
@@ -207,7 +207,7 @@ def read_seq(seq_file, keep_annotation=False):
         and the annotation if keep_annotation is True.
     """
 
-    if seq_file.endswith(('.fq.gz', '.fastq.gz', '.fq', '.fq.gz', '.fq.zst', '.fastq.zst')):
+    if seq_file.endswith(('.fq.gz', '.fastq.gz', '.fq', '.fq.zst', '.fastq.zst')):
         raise Exception("Cannot read FASTQ files.")
         # for rtn in read_fastq_seq(seq_file):
         #     yield rtn