Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ classifiers = [
"License :: OSI Approved :: MIT License",
"Operating System :: OS Independent",
]
version = "0.0.4"
version = "0.0.5"

[tool.maturin]
module-name = "ruranges"
Expand Down
1 change: 1 addition & 0 deletions src/complement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,5 +71,6 @@ pub fn sweep_line_non_overlaps<G: GroupType, T: PositionType>(
}
}

radsort::sort(&mut no_overlaps);
no_overlaps
}
26 changes: 17 additions & 9 deletions src/group_cumsum.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
use radsort::sort_by_key;

use crate::{ruranges_structs::{GroupType, PositionType}, sorts::build_subsequence_intervals};
use crate::{ruranges_structs::{GroupType, MinInterval, PositionType}, sorts::build_subsequence_intervals};


pub fn sweep_line_cumsum<G, T>(
Expand All @@ -17,12 +17,10 @@ where

sort_by_key(&mut ivals, |iv| (iv.chr, iv.start));

let mut idx_out = Vec::with_capacity(chrs.len());
let mut cumsum_start = Vec::with_capacity(chrs.len());
let mut cumsum_end = Vec::with_capacity(chrs.len());
let mut results= Vec::with_capacity(chrs.len());

if ivals.is_empty() {
return (idx_out, cumsum_start, cumsum_end);
return (Vec::with_capacity(chrs.len()),Vec::with_capacity(chrs.len()), Vec::with_capacity(chrs.len()));
}

let mut current_chr = ivals[0].chr;
Expand All @@ -39,11 +37,21 @@ where
let s = running_total;
let e = running_total + len;

idx_out.push(iv.idx);
cumsum_start.push(s);
cumsum_end.push(e);
results.push(MinInterval {idx: iv.idx, start: s, end: e});
running_total = e;
}

(idx_out, cumsum_start, cumsum_end)
sort_by_key(&mut results, |i| i.idx);

let mut out_idxs = Vec::with_capacity(results.len());
let mut out_starts = Vec::with_capacity(results.len());
let mut out_ends = Vec::with_capacity(results.len());

for rec in results {
out_idxs.push(rec.idx);
out_starts.push(rec.start);
out_ends.push(rec.end);
}

(out_idxs, out_starts, out_ends)
}
29 changes: 19 additions & 10 deletions src/map_to_global.rs
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@

use numpy::{IntoPyArray, PyArray1, PyReadonlyArray1};
use pyo3::prelude::*;
use radsort::sort_by_key;

use crate::ruranges_structs::{GroupType, PositionType};
use crate::ruranges_structs::{GroupType, PositionType, StrandInterval};


#[allow(clippy::too_many_arguments)]
Expand Down Expand Up @@ -37,10 +38,7 @@ pub fn map_to_global<G: GroupType, T: PositionType>(
debug_assert_eq!(q_tx.len(), q_fwd.len());

// ------------------- output buffers -----------------------------------
let mut out_idx: Vec<u32> = Vec::new();
let mut out_start = Vec::new();
let mut out_end = Vec::new();
let mut out_fwd = Vec::new();
let mut results = Vec::new();

// ------------------- two-pointer sweep ---------------------------------
let mut ei = 0usize; // exon pointer
Expand Down Expand Up @@ -110,10 +108,7 @@ pub fn map_to_global<G: GroupType, T: PositionType>(
};

// push result
out_idx .push(idx);
out_start.push(g_start);
out_end .push(g_end);
out_fwd .push(local_f == ex_fwd[ek]);
results.push(StrandInterval {start: g_start, end: g_end, idx: idx, fwd: local_f == ex_fwd[ek]});

// advance inside query
l = seg_end_local;
Expand All @@ -132,5 +127,19 @@ pub fn map_to_global<G: GroupType, T: PositionType>(
}
}

(out_idx, out_start, out_end, out_fwd)
sort_by_key(&mut results, |i| i.idx);

let mut out_idxs = Vec::with_capacity(results.len());
let mut out_starts = Vec::with_capacity(results.len());
let mut out_ends = Vec::with_capacity(results.len());
let mut out_strands = Vec::with_capacity(results.len());

for rec in results {
out_idxs.push(rec.idx);
out_starts.push(rec.start);
out_ends.push(rec.end);
out_strands.push(rec.fwd);
}

(out_idxs, out_starts, out_ends, out_strands)
}
34 changes: 19 additions & 15 deletions src/nearest.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use std::{str::FromStr, time::Instant};

use radsort::sort_by_key;

use crate::{
overlaps::{self, sweep_line_overlaps, sweep_line_overlaps_overlap_pair},
ruranges_structs::{GroupType, MinEvent, Nearest, OverlapPair, PositionType},
Expand Down Expand Up @@ -265,9 +267,7 @@ pub fn merge_three_way_by_index_distance<T: PositionType>(
) -> (Vec<u32>, Vec<u32>, Vec<T>) {
// We'll return tuples: (idx, idx2, distance).
// You can adapt if you want a custom struct instead.
let mut idxs1 = Vec::new();
let mut idxs2 = Vec::new();
let mut distance = Vec::new();
let mut results = Vec::new();

// Pointers over each input
let (mut i, mut j, mut r) = (0_usize, 0_usize, 0_usize);
Expand Down Expand Up @@ -376,9 +376,7 @@ pub fn merge_three_way_by_index_distance<T: PositionType>(
}
// Add to result
let OverlapPair { idx, idx2 } = overlaps_slice[oi];
idxs1.push(idx);
idxs2.push(idx2);
distance.push(dcur);
results.push(Nearest { idx: idx, idx2: idx2, distance: T::zero() });
oi += 1;
} else {
break;
Expand All @@ -399,10 +397,7 @@ pub fn merge_three_way_by_index_distance<T: PositionType>(
}
used_distances.insert(dcur);
}
let item = &left_slice[lj];
idxs1.push(item.idx);
idxs2.push(item.idx2);
distance.push(dcur);
results.push(left_slice[lj]);
lj += 1;
} else {
break;
Expand All @@ -423,10 +418,7 @@ pub fn merge_three_way_by_index_distance<T: PositionType>(
}
used_distances.insert(dcur);
}
let item = &right_slice[rr];
idxs1.push(item.idx);
idxs2.push(item.idx2);
distance.push(dcur);
results.push(right_slice[rr]);
rr += 1;
} else {
break;
Expand All @@ -439,5 +431,17 @@ pub fn merge_three_way_by_index_distance<T: PositionType>(
// done collecting up to k distinct distances for this index
}

(idxs1, idxs2, distance)
sort_by_key(&mut results, |n| (n.idx, n.distance, n.idx2));

let mut out_idxs = Vec::with_capacity(results.len());
let mut out_idxs2 = Vec::with_capacity(results.len());
let mut out_distances = Vec::with_capacity(results.len());

for rec in results {
out_idxs.push(rec.idx);
out_idxs2.push(rec.idx2);
out_distances.push(rec.distance);
}

(out_idxs, out_idxs2, out_distances)
}
49 changes: 28 additions & 21 deletions src/overlaps.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
use std::str::FromStr;
use std::time::{Duration, Instant};

use radsort::sort_by_key;
use rustc_hash::{FxHashMap, FxHashSet};

use crate::helpers::keep_first_by_idx;
use crate::ruranges_structs::{GroupType, MaxEvent, MinEvent, OverlapPair, OverlapType, PositionType};
use crate::sorts::{
self, build_sorted_events_single_collection_separate_outputs,
build_sorted_maxevents_with_starts_ends,
self, build_sorted_events_single_collection_separate_outputs, build_sorted_maxevents_with_starts_ends
};

/// Perform a four-way merge sweep to find cross overlaps.
Expand Down Expand Up @@ -43,7 +44,7 @@ pub fn overlaps<C: GroupType, T: PositionType>(
let overlap_type = OverlapType::from_str(overlap_type).unwrap();
let invert = overlap_type == OverlapType::Last;

if overlap_type == OverlapType::All && !contained {
let mut result_pairs = if overlap_type == OverlapType::All && !contained {
// The common, super-optimized case
sweep_line_overlaps(
chrs,
Expand Down Expand Up @@ -73,7 +74,7 @@ pub fn overlaps<C: GroupType, T: PositionType>(
&sorted_ends2,
);
keep_first_by_idx(&mut pairs);
pairs.into_iter().map(|pair| (pair.idx, pair.idx2)).unzip()
pairs
} else {
let maxevents = compute_sorted_maxevents(
chrs,
Expand All @@ -87,13 +88,15 @@ pub fn overlaps<C: GroupType, T: PositionType>(
);
let mut pairs = sweep_line_overlaps_containment(maxevents);
if overlap_type == OverlapType::All {
pairs.into_iter().map(|pair| (pair.idx, pair.idx2)).unzip()
pairs
} else {
keep_first_by_idx(&mut pairs);
pairs.into_iter().map(|pair| (pair.idx, pair.idx2)).unzip()
pairs
}
}
}
};
sort_by_key(&mut result_pairs, |p| p.idx);
result_pairs.into_iter().map(|pair| (pair.idx, pair.idx2)).unzip()
}

pub fn sweep_line_overlaps_set1<C: GroupType, T: PositionType>(
Expand Down Expand Up @@ -479,6 +482,7 @@ pub fn compute_sorted_maxevents<C: GroupType, T: PositionType>(
}
}


pub fn sweep_line_overlaps<C: GroupType, T: PositionType>(
chrs: &[C],
starts: &[T],
Expand All @@ -487,19 +491,19 @@ pub fn sweep_line_overlaps<C: GroupType, T: PositionType>(
starts2: &[T],
ends2: &[T],
slack: T,
) -> (Vec<u32>, Vec<u32>) {
) -> (Vec<OverlapPair>) {
// We'll collect all cross overlaps here
let mut overlaps = Vec::new();
let mut overlaps2 = Vec::new();

if chrs.is_empty() | chrs2.is_empty() {
return (overlaps, overlaps2);
let events = sorts::build_sorted_events(chrs, starts, ends, chrs2, starts2, ends2, slack);

if events.is_empty() {
return overlaps;
};

let events = sorts::build_sorted_events(chrs, starts, ends, chrs2, starts2, ends2, slack);
// Active sets
let mut active1 = FxHashSet::default();
let mut active2 = FxHashSet::default();
let mut active2 =FxHashSet::default();

let mut current_chr = events.first().unwrap().chr;

Expand All @@ -516,18 +520,21 @@ pub fn sweep_line_overlaps<C: GroupType, T: PositionType>(
if e.first_set {
// Overlaps with all currently active intervals in set2
for &idx2 in active2.iter() {
overlaps.push(e.idx);
overlaps2.push(idx2);
overlaps.push(OverlapPair {
idx: e.idx,
idx2: idx2,
});
}
// Now add it to active1
active1.insert(e.idx);
} else {
// Overlaps with all currently active intervals in set1
for &idx1 in active1.iter() {
overlaps.push(idx1);
overlaps2.push(e.idx);
}
// Now add it to active2
for &idx in active1.iter() {
overlaps.push(OverlapPair {
idx: idx,
idx2: e.idx,
});
};
active2.insert(e.idx);
}
} else {
Expand All @@ -540,5 +547,5 @@ pub fn sweep_line_overlaps<C: GroupType, T: PositionType>(
}
}

(overlaps, overlaps2)
overlaps
}
25 changes: 24 additions & 1 deletion src/ruranges_structs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,21 @@ pub struct GenomicData<C: GroupType, P: PositionType> {
pub strands: Option<Vec<bool>>,
}

#[derive(Debug, Clone)]
pub struct MinInterval<T: PositionType> {
pub start: T,
pub end: T,
pub idx: u32,
}

#[derive(Debug, Clone)]
pub struct StrandInterval<T: PositionType> {
pub start: T,
pub end: T,
pub idx: u32,
pub fwd: bool,
}

#[derive(Debug, Clone)]
pub struct Interval<C: GroupType, T: PositionType> {
pub group: C,
Expand Down Expand Up @@ -68,7 +83,7 @@ pub struct OverlapPair {
pub idx2: u32,
}

#[derive(Debug, Clone, Hash)]
#[derive(Debug, Clone, Hash, Copy)]
pub struct Nearest<T: PositionType> {
pub distance: T,
pub idx: u32,
Expand Down Expand Up @@ -133,4 +148,12 @@ impl FromStr for OverlapType {
_ => Err("Invalid direction string"),
}
}
}


pub struct SplicedRecord<T> {
pub idx: u32,
pub start: T,
pub end: T,
pub strand: bool,
}
Loading