From 539ce2e04c084741568cf6c57c01aa5f2bce233e Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 12 Dec 2025 11:23:22 -0500 Subject: [PATCH 1/2] Checksums to verify file copies [skip ci] --- ezfastq/fastq.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/ezfastq/fastq.py b/ezfastq/fastq.py index f155a0d..27faa79 100644 --- a/ezfastq/fastq.py +++ b/ezfastq/fastq.py @@ -8,6 +8,7 @@ # ------------------------------------------------------------------------------------------------- from dataclasses import dataclass +from hashlib import sha256 from pathlib import Path from shutil import copy from subprocess import run @@ -42,6 +43,8 @@ def copy(self, destination): destination.mkdir(parents=True, exist_ok=True) file_copy = destination / self._working_name copy(self.source_path, file_copy) + if file_sha256(self.source_path) != file_sha256(file_copy): # pragma: no cover + raise CopyError(f"checksum failed for {self.source_path}") if self.extension == "fastq": run(["gzip", str(file_copy)]) @@ -71,5 +74,17 @@ def _working_name(self): return f"{self.stem}.{self.extension}" +def file_sha256(path, block_size=65536): + sha = sha256() + with open(path, "rb") as fh: + for block in iter(lambda: fh.read(block_size), b""): + sha.update(block) + return sha.hexdigest() + + +class CopyError(RuntimeError): + pass + + class LinkError(ValueError): pass From 83badfed1f15e7f3a9683df8ade2b3a451bd3ec0 Mon Sep 17 00:00:00 2001 From: Daniel Standage Date: Fri, 12 Dec 2025 11:26:25 -0500 Subject: [PATCH 2/2] Update change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4ac94fd..9c55890 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Added - Support for renaming samples while copying (#6) - Support for symbolic linking instead of copying (#9) +- SHA256 checksums to ensure integrity of copied files (#10) ## [0.1.3] 2025-12-03