Skip to content

Commit 75ecf4e

Browse files
ameynertclaude
andcommitted
test: add tests for rewrite_fasta
Adds tests/tools/test_rewrite_fasta.py covering rewrite_fasta() using pytest's tmp_path fixture: - Canonical autosomes (chr1, chr22) are kept - Sex chromosomes (chrX, chrY) and chrMT are kept - Alt contigs (chr1_alt) and decoys (chrUn_gl000220, chrEBV) are filtered - Mixed input with canonical and non-canonical contigs interleaved - Empty input produces empty output - Multi-line sequences are written in full Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
1 parent 208b3ec commit 75ecf4e

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
"""Tests for the rewrite_fasta tool."""
2+
3+
from pathlib import Path
4+
5+
from divref.tools.rewrite_fasta import rewrite_fasta
6+
7+
8+
def _write_fasta(path: Path, contigs: list[tuple[str, str]]) -> None:
9+
with open(path, "w") as f:
10+
for name, seq in contigs:
11+
f.write(f">{name}\n{seq}\n")
12+
13+
14+
def test_keeps_canonical_autosomes(tmp_path: Path) -> None:
15+
_write_fasta(tmp_path / "in.fa", [("chr1", "ACGT"), ("chr22", "TTTT")])
16+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
17+
assert (tmp_path / "out.fa").read_text() == ">chr1\nACGT\n>chr22\nTTTT\n"
18+
19+
20+
def test_keeps_sex_and_mt_chromosomes(tmp_path: Path) -> None:
21+
_write_fasta(tmp_path / "in.fa", [("chrX", "AAAA"), ("chrY", "CCCC"), ("chrMT", "GGGG")])
22+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
23+
assert (tmp_path / "out.fa").read_text() == ">chrX\nAAAA\n>chrY\nCCCC\n>chrMT\nGGGG\n"
24+
25+
26+
def test_filters_non_canonical(tmp_path: Path) -> None:
27+
_write_fasta(
28+
tmp_path / "in.fa",
29+
[("chr1", "ACGT"), ("chr1_alt", "AAAA"), ("chrUn_gl000220", "TTTT")],
30+
)
31+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
32+
assert (tmp_path / "out.fa").read_text() == ">chr1\nACGT\n"
33+
34+
35+
def test_mixed_canonical_and_non_canonical(tmp_path: Path) -> None:
36+
_write_fasta(
37+
tmp_path / "in.fa",
38+
[("chr1", "ACGT"), ("chr1_alt", "AAAA"), ("chr2", "TTTT"), ("chrEBV", "CCCC")],
39+
)
40+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
41+
assert (tmp_path / "out.fa").read_text() == ">chr1\nACGT\n>chr2\nTTTT\n"
42+
43+
44+
def test_empty_fasta(tmp_path: Path) -> None:
45+
(tmp_path / "in.fa").write_text("")
46+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
47+
assert (tmp_path / "out.fa").read_text() == ""
48+
49+
50+
def test_multiline_sequence_preserved(tmp_path: Path) -> None:
51+
(tmp_path / "in.fa").write_text(">chr1\nACGT\nTGCA\n>chr1_alt\nAAAA\n")
52+
rewrite_fasta(fasta_path=tmp_path / "in.fa", output_path=tmp_path / "out.fa")
53+
assert (tmp_path / "out.fa").read_text() == ">chr1\nACGT\nTGCA\n"

0 commit comments

Comments
 (0)