diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c47e8c9..5e8fdc5 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -18,8 +18,7 @@ jobs: - name: Install run: | python -m pip install --upgrade pip - pip install . - pip install --group dev + pip install .[dev] - name: Test with pytest run: make test - name: Style check diff --git a/.gitignore b/.gitignore index 02890a4..529b13a 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,4 @@ __pycache__ sandbox/ dist/ +.vscode/ diff --git a/CHANGELOG.md b/CHANGELOG.md index 480763c..9997c53 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [0.1.2] 2025-11-05 + +### Changed +- User can now configure subdirectory structure for copy destination (#4) + + ## [0.1.1] 2025-05-30 ### Changed diff --git a/ezfastq/api.py b/ezfastq/api.py index 6c0d354..ad95aa2 100644 --- a/ezfastq/api.py +++ b/ezfastq/api.py @@ -12,12 +12,19 @@ from pathlib import Path -def copy(sample_names, seq_path, pair_mode=PairMode.Unspecified, prefix="", workdir=Path(".")): +def copy( + sample_names, + seq_path, + pair_mode=PairMode.Unspecified, + prefix="", + workdir=Path("."), + subdir="seq", +): copier = FastqCopier.from_dir(sample_names, seq_path, prefix=prefix, pair_mode=pair_mode) - copier.copy_files(workdir / "seq") + copier.copy_files(workdir / subdir) copier.print_copy_log() - nlogs = len(list((workdir / "seq").glob("copy-log-*.toml"))) - with open(workdir / "seq" / f"copy-log-{nlogs + 1}.toml", "w") as fh: + nlogs = len(list((workdir / subdir).glob("copy-log-*.toml"))) + with open(workdir / subdir / f"copy-log-{nlogs + 1}.toml", "w") as fh: print(copier, file=fh) added_samples = set(fastq.sample for fastq in copier.copied_files) added_samples = sorted(added_samples) diff --git a/ezfastq/cli.py b/ezfastq/cli.py index 8093550..77045c2 100644 --- a/ezfastq/cli.py +++ b/ezfastq/cli.py @@ -12,6 +12,7 @@ from argparse import ArgumentParser from importlib.metadata import version from pathlib import Path +from rich_argparse import RichHelpFormatter def main(arglist=None): @@ -22,11 +23,12 @@ def main(arglist=None): pair_mode=args.pair_mode, prefix=args.prefix, workdir=args.workdir, + subdir=args.subdir, ) def parse_args(arglist=None): - if arglist: + if arglist: # pragma: no cover arglist = map(str, arglist) args = get_parser().parse_args(arglist) samples_file = Path(args.samples[0]) @@ -38,7 +40,10 @@ def parse_args(arglist=None): def get_parser(): - parser = ArgumentParser(description="Copy FASTQ files and update sample names") + parser = ArgumentParser( + description="Copy FASTQ files and use sample names to make filenames consistent", + formatter_class=RichHelpFormatter, + ) parser.add_argument( "seq_path", help="path to directory containing sequences in FASTQ format; subdirectories will be searched recursively", @@ -57,10 +62,17 @@ def get_parser(): parser.add_argument( "-w", "--workdir", - metavar="WD", + metavar="PATH", type=Path, default=Path("."), - help="directory to which input files will be copied and renamed", + help="project directory to which input files will be copied and renamed; current directory is used by default", + ) + parser.add_argument( + "-s", + "--subdir", + metavar="PATH", + default="seq", + help="subdirectory path under --workdir to which sequence files will be written; PATH=`seq` by default, but can contain nesting (e.g. `seq/study`)", ) parser.add_argument( "-p", diff --git a/ezfastq/tests/test_cli.py b/ezfastq/tests/test_cli.py index c509add..4a877cf 100644 --- a/ezfastq/tests/test_cli.py +++ b/ezfastq/tests/test_cli.py @@ -30,6 +30,15 @@ def test_copy(tmp_path): assert "SkippedFiles" not in log_data +def test_copy_subdir(tmp_path): + seq_path = files("ezfastq") / "tests" / "data" / "flat" + arglist = [seq_path, "test1", "test2", "--workdir", tmp_path, "--subdir", "seq/PROJa/RUNb"] + cli.main(arglist) + rundir = tmp_path / "seq" / "PROJa" / "RUNb" + assert rundir.is_dir() + assert len(list(rundir.glob("*_R?.fastq.gz"))) == 4 + + def test_copy_sample_names_file(tmp_path): sample_names_file = tmp_path / "sample-names.txt" sample_names_file.write_text("test1\ntest3\ntest2\n") diff --git a/pyproject.toml b/pyproject.toml index a7f24e3..54edb23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,22 +1,19 @@ [project] name = "ezfastq" -version = "0.1.1" +version = "0.1.2" description = "Scan directories for FASTQ files and associate with sample names" readme = "README.md" requires-python = ">=3.10" dependencies = [ "rich", + "rich_argparse", ] authors = [ {name = "Daniel Standage", email = "daniel.standage@st.dhs.gov"}, {name = "Ryan Berger", email = "ryan.berger@st.dhs.gov"}, ] -[build-system] -requires = ["setuptools", "setuptools-scm"] -build-backend = "setuptools.build_meta" - -[dependency-groups] +[project.optional-dependencies] dev = [ "black==25.1", "pytest", @@ -26,5 +23,9 @@ dev = [ [project.scripts] ezfastq = "ezfastq.cli:main" +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + [tool.setuptools] packages = ["ezfastq"]