From 1af67cb8aea317add784aee2a3222f0d02c08f74 Mon Sep 17 00:00:00 2001 From: kkobylin Date: Mon, 27 Jun 2022 17:02:21 +0200 Subject: [PATCH] Add feature_counts function --- pysequila/sequila.py | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/pysequila/sequila.py b/pysequila/sequila.py index 980a5ff..4a73568 100644 --- a/pysequila/sequila.py +++ b/pysequila/sequila.py @@ -132,3 +132,30 @@ def pileup(self, path: str, refPath: str, qual: bool) -> DataFrame: """ jdf = self._jsparkSession.pileup(path, refPath, qual) return DataFrame(jdf, self._wrapped) + + def feature_counts(self, reads: str, genes: str) -> DataFrame: + """ + Create a :class:`DataFrame` with genes counted in reads bam file + + Parameters + ---------- + reads : str + the alignment file in BAM format + genes : str + genes in BED format + Returns + ------- + :class:`DataFrame` + Examples + -------- + >>> ss.feature_counts(reads, genes).show(1) + +---------+------+---------+-------+------+------+--------+ + |sample_id|contig|pos_start|pos_end|strand|Length|countRef| + +---------+------+---------+-------+------+------+--------+ + | 1| chr1| 11| 99| +| 88| 7| + +---------+------+---------+-------+------+------+--------+ + only showing top 1 row + + """ + jdf = self._jsparkSession.featureCounts(reads, genes) + return DataFrame(jdf, self._wrapped)