Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions bench/varity/large_vcf_lift_bench.clj
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
(ns varity.large-vcf-lift-bench
(:require [libra.bench :refer [defbench is]]
[libra.criterium :as c]
[cljam.io.sequence :as cseq]
[cljam.io.vcf :as vcf]
[varity.chain :as ch]
[varity.vcf-lift :as vcf-lift]
[varity.t-common
:refer [test-ref-seq-file
test-large-vcf-file
test-large-chain-file
prepare-cavia!]]))

(defbench large-vcf-lift-bench
(prepare-cavia!)
(with-open [seq-rdr (cseq/reader test-ref-seq-file)
vcf-rdr (vcf/reader test-large-vcf-file)]
(let [chidx (ch/index (ch/load-chain test-large-chain-file))]
(is (c/quick-bench
(doall (vcf-lift/liftover-variants
seq-rdr chidx (vcf/read-variants vcf-rdr))))))))
29 changes: 14 additions & 15 deletions src/varity/chain.clj
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@
(:require [clojure.java.io :as io]
[clojure.string :as string]
[cljam.util.chromosome :refer [normalize-chromosome-key]]
[cljam.util.intervals :as intervals]
[proton.core :refer [as-long]]
[varity.util :as util])
(:import [clojure.lang Sorted]))
[varity.util :as util]))

(defn- update-multi
[m ks f]
Expand Down Expand Up @@ -81,7 +81,9 @@
(recur (->> (assoc d :t-start curr-t-start
:q-start curr-q-start
:t-end (+ curr-t-start (:size d))
:q-end (+ curr-q-start (:size d)))
:q-end (+ curr-q-start (:size d))
:start curr-t-start
:end (+ curr-t-start (:size d)))
(conj! results))
(first r)
(next r)
Expand All @@ -93,11 +95,11 @@
(:q-end header))))))

(defn- index-chain [chain]
(->> chain
cumsum-chain
(map (juxt :t-start identity))
(into (sorted-map))
(assoc chain :data)))
(assoc chain
:data
(-> chain
cumsum-chain
(intervals/index-intervals {:structure :nclist}))))

(defn index
"Creates chain index for search."
Expand All @@ -109,12 +111,11 @@
(mapv index-chain xs)])))))

;;; Search

(defn- in-block?
[pos {:keys [^Sorted data header] :as chain}]
[pos {:keys [data header] :as chain}]
(when (<= (inc (:t-start header)) pos (:t-end header))
(when-let [[start m] (first (. data seqFrom pos false))]
(when (<= start pos (dec (+ start (:size m))))
(when-let [m (first (intervals/find-overlap-intervals data nil pos (inc pos)))]
(when (<= (:t-start m) pos (dec (+ (:t-start m) (:size m))))
(assoc chain :in-block m)))))

(def ^:private normalize-chr (memoize normalize-chromosome-key))
Expand All @@ -137,6 +138,4 @@
(defn search-overlap-blocks
"Calculates a list of blocks that overlap the given interval."
[start end blocks-idx]
(->> (subseq blocks-idx <= end)
(map second)
(filter #(> (:t-end %) start))))
(intervals/find-overlap-intervals blocks-idx nil (inc start) end))
Binary file added test-resources/large.chain.gz
Binary file not shown.
Binary file added test-resources/large.vcf.gz
Binary file not shown.
40 changes: 21 additions & 19 deletions test/varity/chain_test.clj
Original file line number Diff line number Diff line change
@@ -1,14 +1,18 @@
(ns varity.chain-test
(:require [clojure.test :refer :all]
[cljam.util.intervals :as intervals]
[varity.chain :as chain]))

(def ^:private sample-indexed-block
(sorted-map
2 {:t-start 2 :t-end 5}
6 {:t-start 6 :t-end 10}
8 {:t-start 8 :t-end 12}
14 {:t-start 14 :t-end 16}
18 {:t-start 18 :t-end 20}))
(intervals/index-intervals
(map (fn [{:keys [t-start t-end] :as m}]
(assoc m :start t-start :end t-end))
[{:t-start 2 :t-end 5}
{:t-start 6 :t-end 10}
{:t-start 8 :t-end 12}
{:t-start 14 :t-end 16}
{:t-start 18 :t-end 20}])
{:structure :nclist}))

(def ^:private ^:const sample-indexed-chain
{"chr1" [{:header {:t-name "chr1", :t-start 0, :t-end 100 :score 1}}
Expand All @@ -19,19 +23,17 @@
"chr3" [{:header {:t-name "chr2", :t-start 0, :t-end 300 :score 1}}]})

(deftest search-overlap-blocks-test
(is (= (chain/search-overlap-blocks 7 12 sample-indexed-block)
[{:t-start 6, :t-end 10} {:t-start 8, :t-end 12}]))
(is (= (chain/search-overlap-blocks 15 17 sample-indexed-block)
[{:t-start 14, :t-end 16}]))
(is (= (chain/search-overlap-blocks 6 7 sample-indexed-block)
[{:t-start 6, :t-end 10}]))
(is (= (chain/search-overlap-blocks 4 4 sample-indexed-block)
[{:t-start 2, :t-end 5}]))
(is (= (chain/search-overlap-blocks 5 5 sample-indexed-block) []))
(is (= (chain/search-overlap-blocks 6 6 sample-indexed-block)
[{:t-start 6, :t-end 10}]))
(is (= (chain/search-overlap-blocks 1 1 sample-indexed-block) []))
(is (= (chain/search-overlap-blocks 30 31 sample-indexed-block) [])))
(are [start end ans] (= (map #(select-keys % [:t-start :t-end])
(chain/search-overlap-blocks start end sample-indexed-block))
ans)
7 12 [{:t-start 6, :t-end 10} {:t-start 8, :t-end 12}]
15 17 [{:t-start 14, :t-end 16}]
6 7 [{:t-start 6, :t-end 10}]
4 4 [{:t-start 2, :t-end 5}]
5 5 []
6 6 [{:t-start 6, :t-end 10}]
1 1 []
30 31 []))

(deftest search-containing-chains-test
(is (= (chain/search-containing-chains "chr1" 10 20 sample-indexed-chain)
Expand Down
4 changes: 4 additions & 0 deletions test/varity/t_common.clj
Original file line number Diff line number Diff line change
Expand Up @@ -73,3 +73,7 @@
(def test-load-refgene-file "./test-resources/test-refgene.txt")

(def test-load-refseq-file "./test-resources/test-refseq.txt")

(def test-large-chain-file "./test-resources/large.chain.gz")

(def test-large-vcf-file "./test-resources/large.vcf.gz")
4 changes: 2 additions & 2 deletions test/varity/vcf_lift_test.clj
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,14 @@
[{:header {:t-name "chr1", :t-start 0, :t-end 540, :t-size 540,
:q-name "chr1", :q-start 0, :q-end 540, :q-size 540,
:q-strand :reverse}
:data [{:chr "chr1", :size 540, :dt nil, :dq nil}]}]))
:data [{:size 540, :dt nil, :dq nil}]}]))

(def small-forward-chains
(ch/index
[{:header {:t-name "chr1", :t-start 0, :t-end 540, :t-size 540,
:q-name "chr1", :q-start 0, :q-end 540, :q-size 540,
:q-strand :forward}
:data [{:chr "chr1", :size 540, :dt nil, :dq nil}]}]))
:data [{:size 540, :dt nil, :dq nil}]}]))

(deftest small-new-interval-test
(testing "reverse"
Expand Down