From 933621e281b51dd357bbab139020983e4cab3b04 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 13 Dec 2025 21:29:00 -0800 Subject: [PATCH 1/2] Strip sqlite from txdb id's --- CHANGELOG.md | 2 +- README.md | 6 ++-- src/txdb/_ahub.py | 90 +++++++++++++++++++++++------------------------ 3 files changed, 49 insertions(+), 49 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0d5355f..21bd4cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,5 @@ # Changelog -## Version 0.0.1 +## Version 0.0.1 - 0.0.2 - Initial release of the package with class structure and basic functionality. diff --git a/README.md b/README.md index 0def9f3..fe2b0e5 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ # TxDb -This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as `GenomicRanges` objects. It also includes a registry system to easily download and cache standard TxDb databases. +This package provides a Python interface to access and manipulate genome annotations, implemented in the Bioconductor [GenomicFeatures](https://bioconductor.org/packages/GenomicFeatures) package. It allows users to interact with `TxDb` SQLite databases to extract genomic features such as transcripts, exons, CDS, and promoters as [GenomicRanges](https://github.com/biocpy/genomicranges) objects. It also includes a registry system to easily download and cache standard TxDb annotation files. ## Install @@ -17,7 +17,7 @@ pip install txdb ### Using TxDbRegistry -The TxDbRegistry provides easy access to hosted TxDb databases in AnnotationHub. +The TxDbRegistry provides easy access to hosted TxDb databases in [AnnotationHub](https://bioconductor.org/packages/release/bioc/html/AnnotationHub.html). ```python from txdb import TxDbRegistry @@ -30,7 +30,7 @@ print(registry.list_txdb()) # Load a specific database (downloads and caches it automatically) # Example: hg38 knownGene -txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite") +txdb = registry.load_db("TxDb.Hsapiens.UCSC.hg38.knownGene") # Access features transcripts = txdb.transcripts() diff --git a/src/txdb/_ahub.py b/src/txdb/_ahub.py index cf5bfcd..54b1256 100644 --- a/src/txdb/_ahub.py +++ b/src/txdb/_ahub.py @@ -29,183 +29,183 @@ __license__ = "MIT" TXDB_CONFIG = { - "TxDb.Athaliana.BioMart.plantsmart22.sqlite": { + "TxDb.Athaliana.BioMart.plantsmart22": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart22.sqlite", }, - "TxDb.Athaliana.BioMart.plantsmart25.sqlite": { + "TxDb.Athaliana.BioMart.plantsmart25": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart25.sqlite", }, - "TxDb.Athaliana.BioMart.plantsmart28.sqlite": { + "TxDb.Athaliana.BioMart.plantsmart28": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Athaliana.BioMart.plantsmart28.sqlite", }, - "TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite": { + "TxDb.Btaurus.UCSC.bosTau8.refGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Btaurus.UCSC.bosTau8.refGene.sqlite", }, - "TxDb.Celegans.UCSC.ce11.refGene.sqlite": { + "TxDb.Celegans.UCSC.ce11.refGene": { "release_date": "2019-05-01", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Celegans.UCSC.ce11.refGene.sqlite", }, - "TxDb.Celegans.UCSC.ce6.ensGene.sqlite": { + "TxDb.Celegans.UCSC.ce6.ensGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Celegans.UCSC.ce6.ensGene.sqlite", }, - "TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite": { + "TxDb.Cfamiliaris.UCSC.canFam3.refGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Cfamiliaris.UCSC.canFam3.refGene.sqlite", }, - "TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite": { + "TxDb.Dmelanogaster.UCSC.dm3.ensGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Dmelanogaster.UCSC.dm3.ensGene.sqlite", }, - "TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite": { + "TxDb.Dmelanogaster.UCSC.dm6.ensGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Dmelanogaster.UCSC.dm6.ensGene.sqlite", }, - "TxDb.Drerio.UCSC.danRer10.refGene.sqlite": { + "TxDb.Drerio.UCSC.danRer10.refGene": { "release_date": "2019-05-01", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer10.refGene.sqlite", }, - "TxDb.Ggallus.UCSC.galGal4.refGene.sqlite": { + "TxDb.Ggallus.UCSC.galGal4.refGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Ggallus.UCSC.galGal4.refGene.sqlite", }, - "TxDb.Hsapiens.BioMart.igis.sqlite": { + "TxDb.Hsapiens.BioMart.igis": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.BioMart.igis.sqlite", }, - "TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite": { + "TxDb.Hsapiens.UCSC.hg18.knownGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg18.knownGene.sqlite", }, - "TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite": { + "TxDb.Hsapiens.UCSC.hg19.knownGene": { "release_date": "2025-10-29", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg19.knownGene.sqlite", }, - "TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite": { + "TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Hsapiens.UCSC.hg19.lincRNAsTranscripts.sqlite", }, - "TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite": { + "TxDb.Hsapiens.UCSC.hg38.knownGene": { "release_date": "2025-10-29", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.22/TxDb.Hsapiens.UCSC.hg38.knownGene.sqlite", }, - "TxDb.Hsapiens.UCSC.hg38.refGene.sqlite": { + "TxDb.Hsapiens.UCSC.hg38.refGene": { "release_date": "2024-04-02", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Hsapiens.UCSC.hg38.refGene.sqlite", }, - "TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite": { + "TxDb.Mmulatta.UCSC.rheMac3.refGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac3.refGene.sqlite", }, - "TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite": { + "TxDb.Mmulatta.UCSC.rheMac8.refGene": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Mmulatta.UCSC.rheMac8.refGene.sqlite", }, - "TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite": { + "TxDb.Mmulatta.UCSC.rheMac10.refGene": { "release_date": "2021-10-08", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Mmulatta.UCSC.rheMac10.refGene.sqlite", }, - "TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite": { + "TxDb.Mmusculus.UCSC.mm10.ensGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm10.ensGene.sqlite", }, - "TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite": { + "TxDb.Mmusculus.UCSC.mm10.knownGene": { "release_date": "2019-05-01", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite", }, - "TxDb.Mmusculus.UCSC.mm39.refGene.sqlite": { + "TxDb.Mmusculus.UCSC.mm39.refGene": { "release_date": "2024-04-02", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.19/TxDb.Mmusculus.UCSC.mm39.refGene.sqlite", }, - "TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite": { + "TxDb.Mmusculus.UCSC.mm39.knownGene": { "release_date": "2025-03-11", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.21/TxDb.Mmusculus.UCSC.mm39.knownGene.sqlite", }, - "TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite": { + "TxDb.Mmusculus.UCSC.mm9.knownGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Mmusculus.UCSC.mm9.knownGene.sqlite", }, - "TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite": { + "TxDb.Ptroglodytes.UCSC.panTro4.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro4.refGene.sqlite", }, - "TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite": { + "TxDb.Ptroglodytes.UCSC.panTro5.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ptroglodytes.UCSC.panTro5.refGene.sqlite", }, - "TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite": { + "TxDb.Ptroglodytes.UCSC.panTro6.refGene": { "release_date": "2019-10-29", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ptroglodytes.UCSC.panTro6.refGene.sqlite", }, - "TxDb.Rnorvegicus.BioMart.igis.sqlite": { + "TxDb.Rnorvegicus.BioMart.igis": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.BioMart.igis.sqlite", }, - "TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite": { + "TxDb.Rnorvegicus.UCSC.rn4.ensGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Rnorvegicus.UCSC.rn4.ensGene.sqlite", }, - "TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite": { + "TxDb.Rnorvegicus.UCSC.rn5.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Rnorvegicus.UCSC.rn5.refGene.sqlite", }, - "TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite": { + "TxDb.Rnorvegicus.UCSC.rn6.refGene": { "release_date": "2019-05-01", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Rnorvegicus.UCSC.rn6.refGene.sqlite", }, - "TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite": { + "TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq": { "release_date": "2020-10-20", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.12/TxDb.Rnorvegicus.UCSC.rn6.ncbiRefSeq.sqlite", }, - "TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite": { + "TxDb.Rnorvegicus.UCSC.rn7.refGene": { "release_date": "2022-04-18", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Rnorvegicus.UCSC.rn7.refGene.sqlite", }, - "TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite": { + "TxDb.Scerevisiae.UCSC.sacCer2.sgdGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer2.sgdGene.sqlite", }, - "TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite": { + "TxDb.Scerevisiae.UCSC.sacCer3.sgdGene": { "release_date": "2016-12-22", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.4/TxDb.Scerevisiae.UCSC.sacCer3.sgdGene.sqlite", }, - "TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite": { + "TxDb.Sscrofa.UCSC.susScr3.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr3.refGene.sqlite", }, - "TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite": { + "TxDb.Sscrofa.UCSC.susScr11.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Sscrofa.UCSC.susScr11.refGene.sqlite", }, - "TxDb.Ggallus.UCSC.galGal5.refGene.sqlite": { + "TxDb.Ggallus.UCSC.galGal5.refGene": { "release_date": "2020-04-27", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.11/TxDb.Ggallus.UCSC.galGal5.refGene.sqlite", }, - "TxDb.Ggallus.UCSC.galGal6.refGene.sqlite": { + "TxDb.Ggallus.UCSC.galGal6.refGene": { "release_date": "2019-10-29", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.10/TxDb.Ggallus.UCSC.galGal6.refGene.sqlite", }, - "TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite": { + "TxDb.Cfamiliaris.UCSC.canFam4.refGene": { "release_date": "2021-10-08", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam4.refGene.sqlite", }, - "TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite": { + "TxDb.Cfamiliaris.UCSC.canFam5.refGene": { "release_date": "2021-10-08", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.14/TxDb.Cfamiliaris.UCSC.canFam5.refGene.sqlite", }, - "TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite": { + "TxDb.Cfamiliaris.UCSC.canFam6.refGene": { "release_date": "2023-04-06", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.17/TxDb.Cfamiliaris.UCSC.canFam6.refGene.sqlite", }, - "TxDb.Celegans.UCSC.ce11.ensGene.sqlite": { + "TxDb.Celegans.UCSC.ce11.ensGene": { "release_date": "2022-04-18", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.15/TxDb.Celegans.UCSC.ce11.ensGene.sqlite", }, - "TxDb.Drerio.UCSC.danRer11.refGene.sqlite": { + "TxDb.Drerio.UCSC.danRer11.refGene": { "release_date": "2019-05-01", "url": "https://mghp.osn.xsede.org/bir190004-bucket01/AnnotationHub/ucsc/standard/3.9/TxDb.Drerio.UCSC.danRer11.refGene.sqlite", }, From 5a49a575a3ecfb581d0b38a973d4d85d2d5ea158 Mon Sep 17 00:00:00 2001 From: Jayaram Kancherla Date: Sat, 13 Dec 2025 21:35:18 -0800 Subject: [PATCH 2/2] Fix tests --- tests/test_real.py | 2 +- tests/test_registry.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_real.py b/tests/test_real.py index 441cdd5..16aaf87 100644 --- a/tests/test_real.py +++ b/tests/test_real.py @@ -9,7 +9,7 @@ def test_real_txdb_workflow(tmp_path): registry = TxDbRegistry(cache_dir=tmp_path / "cache") - txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene.sqlite" + txdb_id = "TxDb.Celegans.UCSC.ce11.ensGene" assert txdb_id in registry.list_txdb() diff --git a/tests/test_registry.py b/tests/test_registry.py index 7b5d251..a540c5e 100644 --- a/tests/test_registry.py +++ b/tests/test_registry.py @@ -38,7 +38,7 @@ def registry(tmp_path): def test_registry_init(registry): assert isinstance(registry, TxDbRegistry) - assert "TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite" in registry.list_txdb() + assert "TxDb.Mmusculus.UCSC.mm10.knownGene" in registry.list_txdb() # @patch("txdb.txdbregistry.BiocFileCache") @@ -55,7 +55,7 @@ def test_registry_init(registry): # registry._bfc = mock_bfc # # Test load_db -# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene.sqlite") +# txdb = registry.load_db("TxDb.Mmusculus.UCSC.mm10.knownGene") # assert isinstance(txdb, TxDb) # assert txdb.dbpath == mock_db_file