bioforensics · standage · Dec 12, 2025 · Dec 10, 2025 · Dec 10, 2025 · Dec 10, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -5,6 +5,12 @@ All notable changes to this project will be documented in this file.
 The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 
+
+## Unreleased
+### Added
+- Support for renaming samples while copying (#6)
+
+
 ## [0.1.3] 2025-12-03
 ### Added
 - Option to add source and destination paths to the copy log with a `--verbose` flag (#5)

diff --git a/ezfastq/api.py b/ezfastq/api.py
@@ -13,15 +13,15 @@
 
 
 def copy(
-    sample_names,
+    sample_name_map,
     seq_path,
     pair_mode=PairMode.Unspecified,
     prefix="",
     workdir=Path("."),
     subdir="seq",
     verbose=False,
 ):
-    copier = FastqCopier.from_dir(sample_names, seq_path, prefix=prefix, pair_mode=pair_mode)
+    copier = FastqCopier.from_dir(sample_name_map, seq_path, prefix=prefix, pair_mode=pair_mode)
     copier.copy_files(workdir / subdir)
     copier.print_copy_log()
     nlogs = len(list((workdir / subdir).glob("copy-log-*.toml")))

diff --git a/ezfastq/cli.py b/ezfastq/cli.py
@@ -8,11 +8,14 @@
 # -------------------------------------------------------------------------------------------------
 
 from .api import copy
+from .namemap import NameMap
 from .pair import PairMode
 from argparse import ArgumentParser
 from importlib.metadata import version
 from pathlib import Path
-from rich_argparse import RichHelpFormatter
+from rich.text import Text
+from rich_argparse import RawDescriptionRichHelpFormatter
+from shutil import get_terminal_size
 
 
 def main(arglist=None):
@@ -35,15 +38,27 @@ def parse_args(arglist=None):
     samples_file = Path(args.samples[0])
     samples_file_exists = samples_file.is_file() or samples_file.is_fifo()
     if len(args.samples) == 1 and samples_file_exists:
-        args.samples = samples_file.read_text().strip().split("\n")
+        args.samples = NameMap.from_file(samples_file)
+    else:
+        args.samples = NameMap.from_arglist(args.samples)
     args.pair_mode = PairMode(args.pair_mode)
     return args
 
 
 def get_parser():
+    epilog = """
+[bold cyan]Examples:[/bold cyan]
+    [dim]ezfastq /path/to/fastqs/ sample1 sample2 sample3
+    ezfastq /path/to/fastqs/ s1:Sample1 s2:Sample2 s3:Sample3
+    ezfastq /path/to/fastqs/ samplenames.txt
+    ezfastq /path/to/fastqs/ samplenames.txt --workdir /path/to/projectdir/ --subdir seq/Run01/
+    ezfastq /path/to/fastqs/ samplenames.txt --pair-mode 2[/dim]
+"""
+    width = min(99, get_terminal_size().columns - 2)
     parser = ArgumentParser(
         description="Copy FASTQ files and use sample names to make filenames consistent",
-        formatter_class=RichHelpFormatter,
+        formatter_class=lambda prog: RawDescriptionRichHelpFormatter(prog, width=width),
+        epilog=epilog,
     )
     parser.add_argument(
         "seq_path",
@@ -52,7 +67,7 @@ def get_parser():
     parser.add_argument(
         "samples",
         nargs="+",
-        help="name of one or more samples to process; can be provided as command-line arguments or as a file with one sample name per line",
+        help="name of one or more samples to process; samples can optionally be renamed by appending a colon and new name to each sample name; alternatively, sample names can be provided as a file with one sample name per line, or two tab-separated values to rename samples",
     )
     parser.add_argument(
         "-v",

diff --git a/ezfastq/copier.py b/ezfastq/copier.py
@@ -9,6 +9,7 @@
 
 from .fastq import FastqFile
 from .map import SampleFastqMap
+from .namemap import NameMap
 from .pair import PairMode
 from dataclasses import dataclass
 from io import StringIO
@@ -35,18 +36,18 @@ class FastqCopier:
     FASTQ file names are streamlined in the process, and read pairing status is validated.
     """
 
-    sample_names: List
+    sample_name_map: NameMap
     copied_files: List
     skipped_files: List
     file_map: SampleFastqMap
     prefix: str = ""
 
     @classmethod
-    def from_dir(cls, sample_names, data_path, prefix="", pair_mode=PairMode.Unspecified):
+    def from_dir(cls, sample_name_map, data_path, prefix="", pair_mode=PairMode.Unspecified):
         copied_files = list()
         skipped_files = list()
-        file_map = SampleFastqMap.new(sample_names, data_path, pair_mode=pair_mode)
-        copier = cls(sorted(sample_names), copied_files, skipped_files, file_map, prefix)
+        file_map = SampleFastqMap.new(sample_name_map.keys(), data_path, pair_mode=pair_mode)
+        copier = cls(sample_name_map, copied_files, skipped_files, file_map, prefix)
         return copier
 
     def copy_files(self, destination):
@@ -95,7 +96,7 @@ def print_copy_log(self, outstream=sys.stderr):
 
     @property
     def length_longest_sample_name(self):
-        return max(len(sample) for sample in self.sample_names)
+        return max(len(sample) for sample in self.sample_name_map.keys())
 
     def __len__(self):
         return sum(len(fqfiles) for fqfiles in self.file_map.values())
@@ -105,7 +106,8 @@ def __iter__(self):
             for n, fqfile in enumerate(fqfiles, 1):
                 source_path = Path(fqfile).absolute()
                 read = 0 if len(fqfiles) == 1 else n
-                yield FastqFile(source_path, sample_name, read, self.prefix)
+                new_name = self.sample_name_map[sample_name]
+                yield FastqFile(source_path, new_name, read, self.prefix)
 
     def __str__(self):
         output = StringIO()

diff --git a/ezfastq/namemap.py b/ezfastq/namemap.py
@@ -0,0 +1,46 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2025, DHS. This file is part of ezfastq: https://github.com/bioforensics/ezfastq.
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+
+class NameMap(dict):
+    @classmethod
+    def from_arglist(cls, arg_list):
+        name_map = cls()
+        for argument in arg_list:
+            old_name, new_name = cls.parse_name(argument, sep=":")
+            name_map[old_name] = new_name
+        return name_map
+
+    @classmethod
+    def from_file(cls, path):
+        name_map = cls()
+        with open(path, "r") as fh:
+            for line in fh:
+                old_name, new_name = cls.parse_name(line, sep="\t")
+                name_map[old_name] = new_name
+        if len(name_map) == 0:
+            raise ValueError(f'sample name file "{path}" is empty')
+        return name_map
+
+    @staticmethod
+    def parse_name(name_string, sep=":"):
+        name_string = name_string.strip()
+        num_values = name_string.count(sep) + 1
+        if num_values != 1 and num_values != 2:
+            message = f'expected 1 or 2 values in sample name, not {num_values}: "{name_string}"'
+            raise SampleNameError(message)
+        if num_values == 1:
+            return name_string, name_string
+        else:
+            old_name, new_name = name_string.split(sep)
+            return old_name, new_name
+
+
+class SampleNameError(ValueError):
+    pass
diff --git a/ezfastq/tests/test_copier.py b/ezfastq/tests/test_copier.py
@@ -8,8 +8,9 @@
 # -------------------------------------------------------------------------------------------------
 
 from ezfastq.copier import FastqCopier
+from ezfastq.namemap import NameMap
 from importlib.resources import files
-import pytest
+from itertools import product
 
 try:
     import tomllib
@@ -22,7 +23,7 @@
 
 
 def test_copier_basic():
-    sample_names = ["test1", "test2"]
+    sample_names = NameMap.from_arglist(["test1", "test2"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     observed = [fqfile.source_path.name for fqfile in copier]
     expected = [
@@ -36,7 +37,7 @@ def test_copier_basic():
 
 
 def test_copier_copy(tmp_path):
-    sample_names = ["test1", "test2"]
+    sample_names = NameMap.from_arglist(["test1", "test2"])
     # First pass: copy all 4
     copier1 = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     copier1.copy_files(tmp_path)
@@ -58,14 +59,14 @@ def test_copier_copy(tmp_path):
 
 
 def test_copier_prefix(tmp_path):
-    sample_names = ["test2", "test3"]
+    sample_names = NameMap.from_arglist(["test2", "test3"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1, prefix="abc_")
     copier.copy_files(tmp_path)
     assert len(list(tmp_path.glob("abc_*.fastq.gz"))) == 4
 
 
 def test_copier_str_basic(tmp_path):
-    sample_names = ["test1", "test2", "test3"]
+    sample_names = NameMap.from_arglist(["test1", "test2", "test3"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     copier.copy_files(tmp_path)
     observed = str(copier)
@@ -81,14 +82,14 @@ def test_copier_str_basic(tmp_path):
     assert observed.strip() == expected.strip()
 
 
-def test_copier_str_noop(tmp_path):
-    sample_names = ["test1", "test2", "test3"]
+def test_copier_str_noop():
+    sample_names = NameMap.from_arglist(["test1", "test2", "test3"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     assert str(copier) == ""
 
 
 def test_copier_str_allskip(tmp_path):
-    sample_names = ["test1"]
+    sample_names = NameMap.from_arglist(["test1"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     (tmp_path / "test1_R1.fastq.gz").touch()
     (tmp_path / "test1_R2.fastq.gz").touch()
@@ -105,7 +106,7 @@ def test_copier_str_allskip(tmp_path):
 
 
 def test_copier_str_mixed(tmp_path):
-    sample_names = ["test1", "test2", "test3"]
+    sample_names = NameMap.from_arglist(["test1", "test2", "test3"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_2)
     (tmp_path / "test2_R1.fastq.gz").touch()
     (tmp_path / "test2_R2.fastq.gz").touch()
@@ -128,7 +129,7 @@ def test_copier_str_mixed(tmp_path):
 
 
 def test_copier_str_roundtrip(tmp_path):
-    sample_names = ["test1", "test2", "test3"]
+    sample_names = NameMap.from_arglist(["test1", "test2", "test3"])
     copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
     (tmp_path / "test2_R1.fastq.gz").touch()
     (tmp_path / "test2_R2.fastq.gz").touch()
@@ -143,3 +144,26 @@ def test_copier_str_roundtrip(tmp_path):
     observed = copy_data["SkippedFiles"]["already_copied"]
     expected = ["test2_R1.fq.gz", "test2_R2.fq.gz"]
     assert observed == expected
+
+
+def test_copier_with_renaming(tmp_path):
+    sample_names = NameMap.from_arglist(
+        ["test1:TestSampleA", "test2:TestSampleB", "test3:TestSampleC"]
+    )
+    copier = FastqCopier.from_dir(sample_names, SEQ_PATH_1)
+    copier.copy_files(tmp_path)
+    observed = str(copier)
+    for sample, end in product("ABC", "12"):
+        fastq = tmp_path / f"TestSample{sample}_R{end}.fastq.gz"
+        assert fastq.is_file()
+    print(observed)
+    expected = """
+[CopiedFiles]
+"test1_S1_L001_R1_001.fastq.gz" = "TestSampleA_R1.fastq.gz"
+"test1_S1_L001_R2_001.fastq.gz" = "TestSampleA_R2.fastq.gz"
+"test2_R1.fq.gz" = "TestSampleB_R1.fastq.gz"
+"test2_R2.fq.gz" = "TestSampleB_R2.fastq.gz"
+"test3-reads-r1.fastq" = "TestSampleC_R1.fastq.gz"
+"test3-reads-r2.fastq" = "TestSampleC_R2.fastq.gz"
+"""
+    assert observed.strip() == expected.strip()
diff --git a/ezfastq/tests/test_namemap.py b/ezfastq/tests/test_namemap.py
@@ -0,0 +1,63 @@
+# -------------------------------------------------------------------------------------------------
+# Copyright (c) 2025, DHS. This file is part of ezfastq: https://github.com/bioforensics/ezfastq.
+#
+# This software was prepared for the Department of Homeland Security (DHS) by the Battelle National
+# Biodefense Institute, LLC (BNBI) as part of contract HSHQDC-15-C-00064 to manage and operate the
+# National Biodefense Analysis and Countermeasures Center (NBACC), a Federally Funded Research and
+# Development Center.
+# -------------------------------------------------------------------------------------------------
+
+from ezfastq.namemap import NameMap, SampleNameError
+import pytest
+
+
+@pytest.mark.parametrize(
+    "arglist,expected",
+    [
+        (["s1", "s2", "s3"], {"s1": "s1", "s2": "s2", "s3": "s3"}),
+        (["s1:Sample1", "s2:Sample2"], {"s1": "Sample1", "s2": "Sample2"}),
+        (
+            ["1-1:99-12-005-1-1", "99-12-005-1-2", "1-3:99-12-005-1-3"],
+            {"1-1": "99-12-005-1-1", "99-12-005-1-2": "99-12-005-1-2", "1-3": "99-12-005-1-3"},
+        ),
+    ],
+)
+def test_name_map_from_arglist(arglist, expected):
+    observed = NameMap.from_arglist(arglist)
+    assert observed == expected
+
+
+def test_name_map_bad_arglist():
+    arglist = ["s1:Sample:1", "s2:Sample:2", "s3:Sample:3"]
+    message = 'expected 1 or 2 values in sample name, not 3: "s1:Sample:1"'
+    with pytest.raises(SampleNameError, match=message):
+        NameMap.from_arglist(arglist)
+
+
+def test_name_map_from_empty_arglist():
+    namemap = NameMap.from_arglist([])
+    assert len(namemap) == 0
+
+
+@pytest.mark.parametrize(
+    "contents,expected",
+    [
+        ("s1\ns2\ns3", {"s1": "s1", "s2": "s2", "s3": "s3"}),
+        (
+            "1-1\t99-12-005-1-1\n1-2\t99-12-005-1-2\n1-3\t99-12-005-1-3",
+            {"1-1": "99-12-005-1-1", "1-2": "99-12-005-1-2", "1-3": "99-12-005-1-3"},
+        ),
+    ],
+)
+def test_name_map_from_file(contents, expected, tmp_path):
+    mapfile = tmp_path / "map_file.txt"
+    mapfile.write_text(contents)
+    observed = NameMap.from_file(mapfile)
+    assert observed == expected
+
+
+def test_name_map_from_empty_file(tmp_path):
+    mapfile = tmp_path / "map_file.txt"
+    mapfile.touch()
+    with pytest.raises(ValueError, match=r"sample name file .* is empty"):
+        NameMap.from_file(mapfile)