@@ -88,10 +88,13 @@ def parse_cli_input() -> argparse.Namespace:
8888def write_output_file_header (handle : TextIO ) -> int :
8989 return handle .write (
9090 "SeqID\t GrandParentID\t ParentID\t FeatureID\t Type\t Start\t End\t Strand\t CoveredSites"
91- + "\t SiteFreqs["
91+ + "\t GenomeBases["
92+ + "," .join (BASE_TYPES )
93+ + "]"
94+ + "\t SiteBasePairs["
9295 + "," .join (MATCH_MISMATCH_TYPES )
9396 + "]"
94- + "\t ReadFreqs ["
97+ + "\t ReadBasePairs ["
9598 + "," .join (MATCH_MISMATCH_TYPES )
9699 + "]"
97100 + "\n "
@@ -102,7 +105,7 @@ class SiteFilter:
102105 def __init__ (self , cov_threshold : int , edit_threshold : int ) -> None :
103106 self .cov_threshold : int = cov_threshold
104107 self .edit_threshold : int = edit_threshold
105- self .frequencies : NDArray [np .int32 ] = np .zeros (4 , np .int32 )
108+ self .frequencies : NDArray [np .int32 ] = np .zeros (5 , np .int32 )
106109
107110 def apply (self , variant_data : SiteVariantData ) -> None :
108111 if variant_data .coverage >= self .cov_threshold :
@@ -127,9 +130,11 @@ def __init__(self, site_filter: SiteFilter) -> None:
127130 Rows and column indices correspond to bases in alphabetic order (ACGT)
128131 Row-columns corresponding to the same base (e.g. (0,0) -> (A,A)) do not represent edits, and should remain 0
129132 """
130- self .edit_read_freqs : NDArray [np .int32 ] = np .zeros ((4 , 4 ), dtype = np .int32 )
133+ self .edit_read_freqs : NDArray [np .int32 ] = np .zeros ((5 , 5 ), dtype = np .int32 )
134+
135+ self .edit_site_freqs : NDArray [np .int32 ] = np .zeros ((5 , 5 ), dtype = np .int32 )
131136
132- self .edit_site_freqs : NDArray [np .int32 ] = np .zeros (( 4 , 4 ) , dtype = np .int32 )
137+ self .genome_base_freqs : NDArray [np .int32 ] = np .zeros (5 , dtype = np .int32 )
133138
134139 self .filter = site_filter
135140
@@ -144,13 +149,19 @@ def update(self, variant_data: SiteVariantData) -> None:
144149 self .filter .apply (variant_data )
145150 self .edit_site_freqs [i , :] += self .filter .frequencies
146151
152+ self .genome_base_freqs [i ] += 1
153+
147154 return None
148155
149156 def report (self , output_handle : TextIO ) -> int :
150157 b = 0
151158
152159 # Write the number of covered sites
153- b += output_handle .write (str (self .edit_site_freqs .sum ()))
160+ b += output_handle .write (str (self .genome_base_freqs .sum ()))
161+ b += output_handle .write ("\t " )
162+
163+ # Write the base frequencies in the genome
164+ b += write_base_array (output_handle , self .genome_base_freqs )
154165 b += output_handle .write ("\t " )
155166
156167 # Write edited sites
0 commit comments