Juke34
diff --git a/‎README.md‎
Lines changed: 4 additions & 1 deletion b/‎README.md‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎bin/drip_aggregates.py‎ ‎bin/drip.py‎bin/drip_aggregates.py renamed to bin/drip.py
Lines changed: 15 additions & 15 deletions b/‎bin/drip_aggregates.py‎ ‎bin/drip.py‎bin/drip_aggregates.py renamed to bin/drip.py
Lines changed: 15 additions & 15 deletions
@@ -274,7 +274,7 @@ This hierarchical information is provided in the same manner in the aggregate fi
 | AggregateID        | String                                                       | ID assigned after the feature under which the aggregation was done                                                                                                                 |
 | ParentType       | String                                                       | Type of the parent of the feature under which the aggregation was done                                                                                                 |
 | AggregateType    | String                                                       | Type of the features that are aggregated                                                                                                                               |
-| AggregationMode  | `all_isoforms`, `longest_isoform`, `chimaera` or `all-sites` | Way in which the aggregation was performed                                                                                                                             |
+| AggregationMode  | `all_isoforms`, `longest_isoform`, `chimaera`, `feature` or `all-sites` | Way in which the aggregation was performed                                                                                                                             |
 | CoveredSites     | Positive integer                                             | Number of sites in the aggregated features that satisfy the minimum level of coverage                                                                                  |
 | GenomeBases      | Comma-separated positive integers                            | Frequencies of the bases in the aggregated features in the reference genome (order: A, C, G, T)                                                                        |
 | SiteBasePairings | Comma-separated positive integers                            | Number of sites in which each genome-variant base pairings is found in the aggregated features (order: AA, AC, AG, AT, CA, CC, CG, CT, GA, GC, GG, GT, TA, TC, TG, TT) |
@@ -303,6 +303,9 @@ The existence of alternative transcripts of a same gene causes some complication
 
 3. **Chimaera** (*Chimaera* in the figure): Report the counts from the union of feature ranges over all the isoforms. Its ID is composed of the ID of the gene plus "-chimaera". The aggregation types of chimaeras are postfixed with "-chimaera" as well.
 
+4. **Feature**
+Standard mode for regular features. Aggregates data from sub-features (children) of a given feature. For example, for an exon or CDS, it aggregates the counts of all its constituent elements.
+
 In the example below, a gene has three transcripts. For the **longest isoform** aggregation, Transcript 1 would be selected, because it has the greatest sum of exon lengths (numbers under the exon boxes). For the **all isoforms** aggregation, all the transcripts (1, 2, and 3) would be used. For **chimaera** aggregation, the aggregation ranges are the union of the ranges of the exons of all the transcripts. Therefore, the total length of the chimaeric features is always equal ot greater than the longest transcript.
 
 ![alt text](doc/img/aggregation_modes.png)
 
@@ -10,7 +10,7 @@ def print_help():
 DRIP - RNA Editing Analysis Tool
 
 DESCRIPTION:
-    This script analyzes RNA editing from RAIN aggregate files. It calculates
+    This script analyzes RNA editing from standardized puviometer files. It calculates
     two key metrics for all 16 genome-variant base pair combinations across multiple 
     samples and combines them into a unified matrix format.
 
@@ -35,7 +35,7 @@ def print_help():
                         (order: AA, AC, AG, AT, CA, CC, CG, CT, GA, GC, GG, GT, TA, TC, TG, TT)
 
 CALCULATED METRICS:
-    For each aggregate feature, the script calculates metrics for all 16 base pair combinations:
+    For each line, the script calculates metrics for all 16 base pair combinations:
     
     For each combination XY (where X = genome base, Y = read base):
     
@@ -61,10 +61,11 @@ def print_help():
     Metadata columns (first 6 columns):
     - SeqID: Sequence/chromosome identifier
     - ParentIDs: Parent feature identifiers
-    - AggregateID: Unique aggregate identifier
-    - ParentType: Type of parent feature
-    - AggregateType: Type of aggregate feature
-    - AggregationMode: Mode of aggregation used
+    - ID: Unique identifier
+    - Ptype: Type of Parent feature
+    - Type: Type of feature
+    - Ctype: Type of Children feature
+    - Mode: Mode of aggregation used if any (e.g., 'all_sites', 'edited_sites', 'edited_reads')
     
     Metric columns (for each sample):
     - GROUP::SAMPLE::REPLICATE::espf: XY sites proportion in feature (XY sites / X bases)
@@ -94,7 +95,7 @@ def print_help():
     - results_TA.tsv, results_TC.tsv, results_TG.tsv, results_TT.tsv
     
     Each file has columns:
-    SeqID, ParentIDs, AggregateID, ParentType, AggregateType, AggregationMode,
+    SeqID, ParentIDs, ID, Ptype, Ctype, Mode,
     control::sample1::rep1::rain_sample1::espf, control::sample1::rep1::rain_sample1::espr,
     control::sample2::rep2::rain_sample2::espf, control::sample2::rep2::rain_sample2::espr,
     treated::sample1::rep1::rain_sample3::espf, treated::sample1::rep1::rain_sample3::espr
@@ -118,9 +119,8 @@ def parse_tsv_file(filepath, group_name, sample_name, replicate, file_id, includ
     """Parse a single TSV file and extract editing metrics for all base pair combinations."""
     df = pd.read_csv(filepath, sep='\t')
 
-    # DO NOT filter out rows where AggregateID is '.' 
+    # DO NOT filter out rows where ID is '.' 
     # These are special aggregate rows (e.g., all_sites) that should be kept
-    
     # Base pair combinations in order
     base_pairs = ['AA', 'AC', 'AG', 'AT', 'CA', 'CC', 'CG', 'CT', 
                   'GA', 'GC', 'GG', 'GT', 'TA', 'TC', 'TG', 'TT']
@@ -129,7 +129,7 @@ def parse_tsv_file(filepath, group_name, sample_name, replicate, file_id, includ
     # Parse GenomeBases (order: A, C, G, T)
     for i, base in enumerate(bases):
         df[f'{base}_count'] = df['GenomeBases'].str.split(',').str[i].astype(int)
-    
+
     # Parse SiteBasePairings (all 16 combinations)
     for i, bp in enumerate(base_pairs):
         df[f'{bp}_sites'] = df['SiteBasePairings'].str.split(',').str[i].astype(int)
@@ -139,7 +139,7 @@ def parse_tsv_file(filepath, group_name, sample_name, replicate, file_id, includ
         df[f'{bp}_reads'] = df['ReadBasePairings'].str.split(',').str[i].astype(int)
 
     # Calculate metrics for each base pair combination
-    metadata_cols = ['SeqID', 'ParentIDs', 'AggregateID', 'ParentType', 'AggregateType', 'AggregationMode']
+    metadata_cols = ['SeqID', 'ParentIDs', 'ID', 'Mtype', 'Ptype', 'Type', 'Ctype', 'Mode', 'Start', 'End', 'Strand']
     result_cols = metadata_cols.copy()
 
     # Create column prefix with group::sample::replicate::file_id or group::sample::replicate
@@ -210,18 +210,18 @@ def merge_samples(file_group_sample_replicate_dict, output_prefix, include_file_
         replicate_list.append(replicate)
 
     # Merge all samples based on metadata columns
-    metadata_cols = ['SeqID', 'ParentIDs', 'AggregateID', 'ParentType', 'AggregateType', 'AggregationMode']
+    metadata_cols = ['SeqID', 'ParentIDs', 'ID', 'Mtype', 'Ptype', 'Type', 'Ctype', 'Mode', 'Start', 'End', 'Strand']
     merged = all_data[0]
     for data in all_data[1:]:
         merged = merged.merge(data, on=metadata_cols, how='outer')
 
     # Fill NA values with 0 for metrics
     merged = merged.fillna(0)
 
-    # Sort by SeqID, then ParentIDs, then AggregationMode
-    merged = merged.sort_values(['SeqID', 'ParentIDs', 'AggregationMode'])
+    # Sort by SeqID, then ParentIDs, then Mode
+    merged = merged.sort_values(['SeqID', 'ParentIDs', 'Mode'])
 
-    metadata_cols = ['SeqID', 'ParentIDs', 'AggregateID', 'ParentType', 'AggregateType', 'AggregationMode']
+    metadata_cols = ['SeqID', 'ParentIDs', 'ID', 'Mtype', 'Ptype', 'Type', 'Ctype', 'Mode', 'Start', 'End', 'Strand']
 
     # Create one file per base pair combination
     output_files = []