11"""Tests for shared Hail utilities in haplotype.py."""
22
3+ from pathlib import Path
34from typing import Any
45
6+ import hail as hl
57import pytest
68
79from divref .haplotype import get_haplo_sequence
810from divref .haplotype import split_haplotypes
911from divref .haplotype import to_hashable_items
1012from divref .haplotype import variant_distance
1113
12- hl = pytest .importorskip ("hail" )
14+ # ---------------------------------------------------------------------------
15+ # Helper functions
16+ # ---------------------------------------------------------------------------
17+
18+
19+ def _make_variant (position : int , ref : str , alt : str ) -> Any :
20+ return hl .Struct (locus = hl .Struct (contig = "chr1" , position = position ), alleles = [ref , alt ])
21+
22+
23+ def _make_haplotype_table (variant_positions : list [tuple [int , str , str ]]) -> Any :
24+ variant_type = hl .tstruct (locus = hl .tstruct (position = hl .tint32 ), alleles = hl .tarray (hl .tstr ))
25+ row_type = hl .tstruct (
26+ variants = hl .tarray (variant_type ),
27+ haplotype = hl .tarray (hl .tstr ),
28+ gnomad_freqs = hl .tarray (hl .tfloat64 ),
29+ )
30+ variants = [
31+ {"locus" : {"position" : pos }, "alleles" : [ref , alt ]} for pos , ref , alt in variant_positions
32+ ]
33+ return hl .Table .parallelize (
34+ [
35+ {
36+ "variants" : variants ,
37+ "haplotype" : [str (i ) for i in range (len (variants ))],
38+ "gnomad_freqs" : [0.1 ] * len (variants ),
39+ }
40+ ],
41+ schema = row_type ,
42+ )
1343
1444
1545# ---------------------------------------------------------------------------
1646# get_haplo_sequence
1747# ---------------------------------------------------------------------------
1848
1949
50+ def test_get_haplo_sequence_single (
51+ datadir : Path ,
52+ hail_reference_genome : Any ,
53+ hail_context : None , # noqa: ARG001
54+ ) -> None :
55+ """get_haplo_sequence should return the correct haplotype sequence."""
56+ test_fasta : Path = datadir / "test.fa"
57+ test_fai : Path = datadir / "test.fa.fai"
58+
59+ hail_reference_genome .add_sequence (str (test_fasta ), str (test_fai ))
60+
61+ variant = _make_variant (position = 100 , ref = "A" , alt = "C" )
62+ x = get_haplo_sequence (
63+ context_size = 2 , variants = [variant ], reference_genome = hail_reference_genome .name
64+ )
65+ print (hl .str (x ))
66+
67+
2068def test_get_haplo_sequence_empty_list_raises () -> None :
2169 """get_haplo_sequence should raise ValueError when given an empty list."""
2270 with pytest .raises (ValueError , match = "at least one variant" ):
@@ -51,10 +99,6 @@ def test_to_hashable_items_sorted_by_key() -> None:
5199# ---------------------------------------------------------------------------
52100
53101
54- def _make_variant (position : int , ref : str , alt : str ) -> Any :
55- return hl .Struct (locus = hl .Struct (position = position ), alleles = [ref , alt ])
56-
57-
58102def test_variant_distance_adjacent_snps (hail_context : None ) -> None : # noqa: ARG001
59103 # SNP at 100, next SNP at 101: distance = 101 - 100 - len("A") = 0
60104 assert (
@@ -81,28 +125,6 @@ def test_variant_distance_deletion_closes_gap(hail_context: None) -> None: # no
81125# ---------------------------------------------------------------------------
82126
83127
84- def _make_haplotype_table (variant_positions : list [tuple [int , str , str ]]) -> Any :
85- variant_type = hl .tstruct (locus = hl .tstruct (position = hl .tint32 ), alleles = hl .tarray (hl .tstr ))
86- row_type = hl .tstruct (
87- variants = hl .tarray (variant_type ),
88- haplotype = hl .tarray (hl .tstr ),
89- gnomad_freqs = hl .tarray (hl .tfloat64 ),
90- )
91- variants = [
92- {"locus" : {"position" : pos }, "alleles" : [ref , alt ]} for pos , ref , alt in variant_positions
93- ]
94- return hl .Table .parallelize (
95- [
96- {
97- "variants" : variants ,
98- "haplotype" : [str (i ) for i in range (len (variants ))],
99- "gnomad_freqs" : [0.1 ] * len (variants ),
100- }
101- ],
102- schema = row_type ,
103- )
104-
105-
106128def test_split_haplotypes_no_split_needed (hail_context : None ) -> None : # noqa: ARG001
107129 # All variants within window_size=200; haplotype is kept intact as one row
108130 ht = _make_haplotype_table ([(100 , "A" , "T" ), (150 , "C" , "G" ), (190 , "G" , "A" )])
0 commit comments